"""This module contains code from
Think Python by Allen B. Downey
http://thinkpython.com
Copyright 2012 Allen B. Downey
License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
"""
import os
def walk(dirname):
??? """Finds the names of all files in dirname and its subdirectories.
??? dirname: string name of directory
??? """
??? names = []
??? for name in os.listdir(dirname):
??????? path = os.path.join(dirname, name)
??????? if os.path.isfile(path):
??????????? names.append(path)
??????? else:
??????????? names.extend(walk(path))
??? return names
def compute_checksum(filename):
??? """Computes the MD5 checksum of the contents of a file.
??? filename: string
??? """
??? cmd = 'md5sum ' + filename
??? return pipe(cmd)
def check_diff(name1, name2):
??? """Computes the difference between the contents of two files.
??? name1, name2: string filenames
??? """
??? cmd = 'diff %s %s' % (name1, name2)
??? return pipe(cmd)
def pipe(cmd):
??? """Runs a command in a subprocess.
??? cmd: string Unix command
??? Returns (res, stat), the output of the subprocess and the exit status.
??? """
??? fp = os.popen(cmd)
??? res = fp.read()
??? stat = fp.close()
??? assert stat is None
??? return res, stat
def compute_checksums(dirname, suffix):
??? """Computes checksums for all files with the given suffix.
??? dirname: string name of directory to search
??? suffix: string suffix to match
??? Returns: map from checksum to list of files with that checksum
??? """
??? names = walk(dirname)
??? d = {}
??? for name in names:
??????? if name.endswith(suffix):
??????????? res, stat = compute_checksum(name)
??????????? checksum, _ = res.split()
??????????? if checksum in d:
??????????????? d[checksum].append(name)
??????????? else:
??????????????? d[checksum] = [name]
??? return d
def check_pairs(names):
??? """Checks whether any in a list of files differs from the others.
??? names: list of string filenames
??? """
??? for name1 in names:
??????? for name2 in names:
??????????? if name1 < name2:
??????????????? res, stat = check_diff(name1, name2)
??????????????? if res:
??????????????????? return False
??? return True
def print_duplicates(d):
??? """Checks for duplicate files.
??? Reports any files with the same checksum and checks whether they
??? are, in fact, identical.
??? d: map from checksum to list of files with that checksum
??? """
??? for key, names in d.iteritems():
??????? if len(names) > 1:
??????????? print 'The following files have the same checksum:'
??????????? for name in names:
??????????????? print name
??????????? if check_pairs(names):
??????????????? print 'And they are identical.'
if __name__ == '__main__':
??? d = compute_checksums(dirname='.', suffix='.py')
??? print_duplicates(d)
更多文章、技術交流、商務合作、聯(lián)系博主
微信掃碼或搜索:z360901061

微信掃一掃加我為好友
QQ號聯(lián)系: 360901061
您的支持是博主寫作最大的動力,如果您喜歡我的文章,感覺我的文章對您有幫助,請用微信掃描下面二維碼支持博主2元、5元、10元、20元等您想捐的金額吧,狠狠點擊下面給點支持吧,站長非常感激您!手機微信長按不能支付解決辦法:請將微信支付二維碼保存到相冊,切換到微信,然后點擊微信右上角掃一掃功能,選擇支付二維碼完成支付。
【本文對您有幫助就好】元
