Demo entry 6682267



Submitted by anonymous on Dec 08, 2017 at 01:23
Language: Python. Code size: 1.7 kB.

def get_duplicate_files(root_dir):
    hash_to_paths= defaultdict(list)
    get_duplicate_files_helper(root_dir, hash_to_paths)
    return [paths for _, paths in hash_to_paths.iteritems() if len(paths) > 1]

def get_duplicate_files_helper(root_dir, hash_to_paths, computed_paths=set(), size_to_paths={}):
    for sub_dir in list_dir(root_dir):
        subdir_path = join_path(root_dir, sub_dir)
        if is_dir(subdir_path):
            get_duplicate_files_helper(subdir_path, hash_to_paths, computed_paths, size_to_paths)            
            file_path = subdir_path
            size = get_file_size(file_path)

            # If we have previously encountered any files of the same size,
            # we go through them one at a time, compute their hashes if necessary
            # and append them to @hash_to_paths. This action in and of itself will
            # cause file paths with equal hashes to be grouped together by hash
            # in @hash_to_paths.
            if size in size_to_paths:
                hash_ = compute_hash(file_path)
                for same_size_path in size_to_paths[size]:
                    if same_size_path not in computed_paths:
                        same_path_hash = compute_hash(same_size_path)                        
            # In any case, we need to remember the size of the current file.

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).