.. link: http://dgplug.org/summertraining/2013/posts/kaviskhandelwal-dup_images-20130731.html .. description: .. tags: .. date: 2013/07/31 09:00:32 .. title: kaviskhandelwal dup_images 20130731 .. slug: kaviskhandelwal-dup_images-20130731 =============== duplicate_image =============== The assignment was to find all the duplicate images from the given direcories as commandline arguments. duplicate_image find the dulicate image files in a given directory Code ==== .. code:: python #!/usr/bin/env python """ duplicate_image.py is a python program to search duplicate images in a given directory """ import os , sys import hashlib from PIL import Image import types """ Package check NoneType EXIF information """ file_list = [] def MD5(location, file): """ Function to get MD5 code of Image file having no EXIF information """ os.chdir(location) with open(file, "r") as picture: fileContents = picture.read() md5 = hashlib.md5() md5.update(fileContents) hash = md5.hexdigest() return hash def exif(location, file): """ Function to get EXIF information of Imagefile """ res = {} os.chdir(location) res = Image.open(file)._getexif() if isinstance( Image.open(file)._getexif(), types.NoneType): return MD5(location, file) else : return res def search(path): """ Function to get a list of all 'jpg' and 'jpeg' image file """ filelist = [] for r,d,f in os.walk(path): for files in f: if (files.endswith(".jpg") or files.endwith(".JPG")) : os.path.join(r,files) file = [r, files] filelist.append(file) return filelist def check_for_duplicate(): """ Function to check the duplicate images in a directory """ global file_list for n in file_list: info = exif(n[0], n[1]) for x in file_list: info2 = exif(x[0], x[1]) if(info == info2): if ((n[1] == x[1] and n[0] != x[0]) or (n[1] != x[1])): print "%s path -> %s is duplicate of %s path -> %s " %(n[1],n[0], x[1],x[0]) if __name__=='__main__': file_list = search(sys.argv[1]) """ List of all 'jpg' and ' jpeg' images """ check_for_duplicate() RUN ==== $ ./duplicate_image.py /home/user searches for all duplicate file in a directory '/home/user'