Copy random files from the file tree

I have the same problem as here , but now I am trying to do the same with python because this is more suitable for the task.

I started with this:

import os
import shutil
import random
import glob


root_dir = '/home/leonardo/Desktop/python_script/rfe'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 200

folders_root_dir = os.listdir(root_dir)
print folders_root_dir

count = len(folders_root_dir)
print  count

for i in xrange(count):
    folder_inside = root_dir + '/' + folders_root_dir[i]
    print folder_inside
    number_files_folder_inside = len(os.listdir(folder_inside))
    print  number_files_folder_inside

    if number_files_folder_inside > ref:
        ref_copy = round(0.2*number_files_folder_inside)
        print ref_copy
        # here I have to copy 20% of the files in this folder to the output folder 
    else:
        # here I have to copy all files from the folder to the output_dir

      

I tried to use os.walk()

but I am new to python and select files while the function that works turned out to be very complicated.

+3


source to share


5 answers


You will need to import these files:

import os
import shutil
import random

      

You can get all files in a directory like this:



files = [file for file in os.listdir(dir) if os.path.isfile(os.path.join(dir, file))]

      

Then use a conditional expression:

if len(files) < 200:
    for file in files:
        shutil.copyfile(os.path.join(dir, file), dst)
else:
    # Amount of random files you'd like to select
    random_amount = 1000
    for x in xrange(random_amount):
        if len(files) == 0:
            break
        else:
            file = random.choice(files)
            shutil.copyfile(os.path.join(dir, file), outputdir)

      

+2


source


import os
import shutil
import random

root_dir = '/home/leonardo/Desktop/python_script/qar'
output_dir = '/home/leonardo/Desktop/python_script/output_folder'
ref = 1

for root, dirs, files in os.walk(root_dir):
    number_of_files = len(os.listdir(root)) 
    if number_of_files > ref:
        ref_copy = int(round(0.2 * number_of_files))
        for i in xrange(ref_copy):
            chosen_one = random.choice(os.listdir(root))
            file_in_track = root
            file_to_copy = file_in_track + '/' + chosen_one
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
    else:
        for i in xrange(len(files)):
            track_list = root
            file_in_track = files[i]
            file_to_copy = track_list + '/' + file_in_track
            if os.path.isfile(file_to_copy) == True:
                shutil.copy(file_to_copy,output_dir)
                print file_to_copy
print 'Finished !' 

      



The final code has this face thanks guys for the help! greetings!

+1


source


maybe something like (untested)

    import os
    THRESHOLD = 200
    root_dir = "\home..."
    output_dir = "\home....."

    for top, dirs, nondirs in os.walk(root_dir):
        for name in nondirs[:THRESHOLD]:
            path = os.path.join(top, name)
            destination = os.path.join(output_dir, name)
            os.rename(path, destination)

      

0


source


import random
import shutil
import os

rootdir = '/home/leonardo/Desktop/python_script/qar'
outdir = '/home/leonardo/Desktop/python_script/output_folder'

ref = 200

dirsAndFiles = {}   # here we store a structure  {folder: [file1, file2], folder2: [file2, file4] }
dirs = [x[0] for x in os.walk(rootdir)] # here we store all sub-dirs

for dir in dirs:
    dirsAndFiles[dir] = [f for f in os.listdir(dir) if os.path.isfile(os.path.join(dir, f))]

for (dir, files) in dirsAndFiles.iteritems():
    if len(files) > ref:
        for i in xrange(int(0.2*len(files))):  # copy 20% of files
            fe = random.choice(files)
            files.remove(fe)
            shutil.copy(os.path.join(dir, fe), outdir)
    else:                                            # copy all files
        for file in files:
             shutil.copy(os.path.join(dir, file), outdir)

      

0


source


A more compact solution (also copyfile

that copyfile

doesn't do its job as expected unless the target filename is also specified):

import os
import shutil
import random

def get_file_list(input_dir):
    return [file for file in os.listdir(input_dir) if os.path.isfile(os.path.join(input_dir, file))]

def get_random_files(file_list, N):
    return random.sample(file_list, N)

def copy_files(random_files, input_dir, output_dir):
    for file in random_files:
        shutil.copy(os.path.join(input_dir, file), output_dir)

def main(input_dir, output_dir, N):
    file_list = get_file_list(input_dir)
    random_files = get_random_files(file_list, N)
    copy_files(random_files, input_dir, output_dir)

      

0


source







All Articles