案例 https://worldjpn.grips.ac.jp/documents/indices/pm/3.html
# encoding.py
from w3lib import encoding
import chardet
import chardet.charsetprober
_html_body_declared_encoding = encoding.html_body_declared_encoding
def html_body_declared_encoding(html_body_str):
res = _html_body_declared_encoding(html_body_str)
if res:
return res
guess = chardet.detect(html_body_str)
if guess and guess['confidence'] > 0.2:
return guess["encoding"]
encoding.html_body_declared_encoding = html_body_declared_encoding
在spider同级目录init引入encoding
import encoding as _
实测 注释掉import
WeChat00f51309f1634ada7dba2387a195a12b.png
使用补丁
WeChat022872536e6ea6a152ff72e96a2ff954.png
网友评论