思路:
- 源文件或文件夹夹与目标不同名的文件有那些?
- 源文件或文件夹夹与目标同名的情况:
a)文件同名,比较文件内容是否相同?
b)文件夹同名,比较共有文件的内容
c)文件同名,但是内容不同
程序1:找出两个文件目录中不同的部分
'''
比较两个目录里文件是否一致?
1.要列出2个文件夹中所有的文件;
2.相互做比较;
3.把比较的结果以报告的形式呈现
'''
import os,sys
def reportdiff(unique1,unique2,dir1,dir2):
'''
生成目录差异化报告
'''
if not (unique1 or unique2):
print("Directory lists are identical")
else:
if unique1:
print('Files unique to:',dir1)
for file in unique1:
print('....',file)
if unique2:
print('Files unique to:',dir2)
for file in unique2:
print('.........',file)
def difference(seq1,seq2):
'''
仅返回seq1中的所有项
'''
return [item for item in seq1 if item not in seq2]
def comparedirs(dir1,dir2,files1=None,files2=None):
'''
比较文件的名字
'''
print('Comparing...',dir1,'to....',dir2)
files1 = os.listdir(dir1) if files1 is None else files1
files2 = os.listdir(dir2) if files2 is None else files2
unique1 = difference(files1,files2)
unique2 = difference(files2,files1)
reportdiff(unique1,unique2,dir1,dir2)
return not(unique1,unique2)
def getarg():
'''
获取参数
'''
try:
dir1,dir2 = sys.argv[1:]
except:
print("Usage: dirdiff.py dir1 dir2")
sys.exit(1)
else:
return (dir1,dir2)
if __name__=='__main__':
dir1,dir2 = getarg()
comparedirs(dir1,dir2)
程序2: 增加文件内容的比较
import os,dirdiff
blocksize = 1024*1024
def intersect(seq1,seq2):
'''
返回seq1和seq2中的所有共有项;
'''
return [item for item in seq1 if item in seq2]
def comparetrees(dir1,dir2,diffs,verbose=False):
'''
比较两个目录中的所有子目录和文件;使用二进制文件来阻止Unicode解码和换行符转换;
因为目录树可能含有二进制文件和文本文件;
可能需要listdir的bytes参数来处理某些平台行不可解码的文件名
'''
print("--"*20)
names1 = os.listdir(dir1)
names2 = os.listdir(dir2)
if not dirdiff.comparedirs(dir1,dir2,names1,names2):
diffs.append('unique file at %s - %s ' %(dir1,dir2))
print('Comparing contents')
common = intersect(names1,names2)
missed = common[:]
#比较共有文件内容
for name in common:
path1 = os.path.join(dir1,name)
path2 = os.path.join(dir2,name)
if os.path.isfile(path1) and os.path.isfile(path2):
missed.remove(name)
file1 = open(path1,'rb')
file2 = open(path2,'rb')
while True:
byetes1 =file1.read(blocksize)
byetes2 = file2.read(blocksize)
if (not byetes1) and (not byetes2):
if verbose:print(name,'matches')
break
if byetes1 != byetes2:
diffs.append('files differ at %s --- %s ' %(path1,path2))
print(name,'DIFFERS')
break
#递归以比较共有目录
for name in common:
path1 = os.path.join(dir1, name)
path2 = os.path.join(dir2, name)
if os.path.isdir(path1) and os.path.isdir(path2):
missed.remove(name)
comparetrees(path1,path2,diffs,verbose)
#同名但一个是文件,一个是目录
for name in missed:
diffs.append('files missed at %s ---%s:%s' %(dir1,dir2,name))
print(name,'DIFFERS')
if __name__ =='__main__':
dir1,dir2 = dirdiff.getarg()
diffs = []
comparetrees(dir1,dir2,diffs,True)
print('='*40)
if not diffs:
print('No diffs found.')
else:
print('Diffs found: ',len(diffs))
for diff in diffs:
print('-',diff)