importsysimportosimporthashlib# function to read file one mb at a timedeffile_reader(file_name,size=1024):"""function that reads file_name in 1 MB pieces"""whileTrue:file_piece=file_name.read(size)ifnotfile_piece:returnyieldfile_piece# iterate through the given paths for all files, adding files# and their hashes to the dictionary "hashes" after comparing# each new pair to the pairs already in dictionarydefcheck_for_duplicates(paths,hash=hashlib.sha1):# create empty ditionary called "hashes"hashes={}forpathinpaths:fordirpath,dirnames,filenamesinos.walk(path):forfilenameinfilenames:# identify each file using whole pathnamefull_path=os.path.join(dirpath,filename)hash_return=hash()# use file_reader function to break file into file_pieces and read piecesforfile_pieceinfile_reader(open(full_path,'rb')):# run the hash function on each piecehash_return.update(file_piece)# compare the hash digest and pathname to items is dictionary hashesfile_id=(hash_return.digest(),os.path.getsize(full_path))duplicate=hashes.get(file_id,None)# if a duplicate hash is found, print the pair# otherwise just add it as another pair to the dictionaryifduplicate:print"Duplicate found: %s and %s"%(full_path,duplicate)else:hashes[file_id]=full_pathifsys.argv[1:]:check_for_duplicates(sys.argv[1:])else:print"Please pass the paths to check as parameters to the script"