#/usr/bin/python
"""
Author:fangself.com.cn
Date:20180808
Func:校验两个文件夹及其内容是否完全一致
"""
BYTES=64 #每次读文件缓存多少字节,默认64字节,根据当前机器执行任务的情况适当调整
THREADS=4 #设置为cpu的个数,使性能达到最佳
RESULTS_FILE="./mdf_hex_res.md" #保存结果的文件
import hashlib
import os
import threading
import sys
def is_same_file(filename):
mdf=hashlib.md5()
f = open(filename,"r",encoding="utf-8")
f.seek(0)
chunk = f.read(1024*BYTES)
while chunk:
mdf.update(chunk.encode("utf-8"))
chunk=f.read(1024*BYTES)
f.close()
return mdf.hexdigest()
class WalkThread(threading.Thread):
def __init__(self,absp,arg):
super(WalkThread,self).__init__()
self.path=os.path.join(absp,arg)
def run(self):
with open(RESULTS_FILE,"a+",encoding="utf-8") as f:
f.write(is_same_file(self.path))
pass
pass
def check_dir_ok(dir):
if dir.isdigit():return False
if not os.path.isdir(dir):return False
if not os.path.exists(dir):return False
return True
pass
if __name__=="__main__":
print(str(sys.argv[1]))
if not check_dir_ok(sys.argv[1]): print("Sorry it is not valid ")
flag=0
for i in os.walk(sys.argv[1]):
if flag==0:flag=1;continue
for j in i[len(i)-1]:
print(type(j))
thr= WalkThread(i[0],j)
thr.start()