python3内置库 chardet
#!/usr/bin/python3 import chardet fh = open("file","rb") dat = fh.read() fh.close() print(chardet.detect(dat)) #{'encoding': 'GB2312', 'confidence': 0.99, 'language': 'Chinese'}
#!/usr/bin/python3 import sys print(sys.getdefaultencoding()) #window 默认 gbk #linux utf-8
#!/usr/bin/python a=bytes('hello',encoding="ascii") # b'hello' 多了一个b ,代表是字节 a=bytes("hello中文",encoding='ascii') #UnicodeEncodeError: 'ascii' codec can't encode characters in position 5-6: ordinal not in range(128) a=bytes("hello中文",encoding="utf-8") #b'hello\xe4\xb8\xad\xe6\x96\x87' 等价于'hello中文'.encode(encoding='utf-8') >>> 'hello中文'.encode(encoding='utf-8')==a True b=bytes("hello中文",encoding="gbk") #b'hello\xd6\xd0\xce\xc4' c=bytes("hello中文",encoding="unicode-escape") b'hello\\u4e2d\\u6587' a==b # False a.decode(encoding="utf-8") #'hello中文' b.decode(encoding="utf-8") #UnicodeDecodeError: 'utf-8' codec can't decode byte 0xd6 in position 5: invalid continuation byte b.decode(encoding="gbk") #'hello中文' a.decode(encoding="utf-8")==b.decode(encoding="gbk") # True #当然中文编码还有 c = bytes("hello中文",encoding="gb18030") #中文 日文 朝鲜语都兼容 d = bytes("hello中文",encoding="gb2312") e = bytes("hello中文",encoding="big5") #台湾地区在用的繁体