美文网首页
采集wipo

采集wipo

作者: 是东东 | 来源:发表于2021-12-13 00:57 被阅读0次

    wipo_js.py

    import os
    
    
    class GetWipoJS(object):
    
        def _getCurrentState(self):
            dic = {
                "type": "brand",
                "la": "en",
                "qi": "0-1OOf/PeepQuegNdjyPRWQP8uO1YSPDMK7DnyHlO6O/8=",
                "queue": 1,
                "_": "11569",
            }
            return dic
    
        def get_js_str(self):
            js_str1 = 'var cc = %s;' % self._getCurrentState()
            js_str2 = """
            var LZString = {
                _keyStr: "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=",
                _f: String.fromCharCode,
                compressToBase64: function(cc) {
                    var c = JSON.stringify(cc);
                    if (c == null) {
                        return ""
                    }
                    var a = "";
                    var k, h, f, j, g, e, d;
                    var b = 0;
                    c = LZString.compress(c);
                    while (b < c.length * 2) {
                        if (b % 2 == 0) {
                            k = c.charCodeAt(b / 2) >> 8;
                            h = c.charCodeAt(b / 2) & 255;
                            if (b / 2 + 1 < c.length) {
                                f = c.charCodeAt(b / 2 + 1) >> 8
                            } else {
                                f = NaN
                            }
                        } else {
                            k = c.charCodeAt((b - 1) / 2) & 255;
                            if ((b + 1) / 2 < c.length) {
                                h = c.charCodeAt((b + 1) / 2) >> 8;
                                f = c.charCodeAt((b + 1) / 2) & 255
                            } else {
                                h = f = NaN
                            }
                        }
                        b += 3;
                        j = k >> 2;
                        g = ((k & 3) << 4) | (h >> 4);
                        e = ((h & 15) << 2) | (f >> 6);
                        d = f & 63;
                        if (isNaN(h)) {
                            e = d = 64
                        } else {
                            if (isNaN(f)) {
                                d = 64
                            }
                        }
                        a = a + LZString._keyStr.charAt(j) + LZString._keyStr.charAt(g) + LZString._keyStr.charAt(e) + LZString._keyStr.charAt(d)
                    }
                    return a
                },
                compress: function(e) {
                    if (e == null) {
                        return ""
                    }
                    var h, l, n = {}, m = {}, o = "", c = "", r = "", d = 2, g = 3, b = 2, q = "", a = 0, j = 0, p, k = LZString._f;
                    for (p = 0; p < e.length; p += 1) {
                        o = e.charAt(p);
                        if (!Object.prototype.hasOwnProperty.call(n, o)) {
                            n[o] = g++;
                            m[o] = true
                        }
                        c = r + o;
                        if (Object.prototype.hasOwnProperty.call(n, c)) {
                            r = c
                        } else {
                            if (Object.prototype.hasOwnProperty.call(m, r)) {
                                if (r.charCodeAt(0) < 256) {
                                    for (h = 0; h < b; h++) {
                                        a = (a << 1);
                                        if (j == 15) {
                                            j = 0;
                                            q += k(a);
                                            a = 0
                                        } else {
                                            j++
                                        }
                                    }
                                    l = r.charCodeAt(0);
                                    for (h = 0; h < 8; h++) {
                                        a = (a << 1) | (l & 1);
                                        if (j == 15) {
                                            j = 0;
                                            q += k(a);
                                            a = 0
                                        } else {
                                            j++
                                        }
                                        l = l >> 1
                                    }
                                } else {
                                    l = 1;
                                    for (h = 0; h < b; h++) {
                                        a = (a << 1) | l;
                                        if (j == 15) {
                                            j = 0;
                                            q += k(a);
                                            a = 0
                                        } else {
                                            j++
                                        }
                                        l = 0
                                    }
                                    l = r.charCodeAt(0);
                                    for (h = 0; h < 16; h++) {
                                        a = (a << 1) | (l & 1);
                                        if (j == 15) {
                                            j = 0;
                                            q += k(a);
                                            a = 0
                                        } else {
                                            j++
                                        }
                                        l = l >> 1
                                    }
                                }
                                d--;
                                if (d == 0) {
                                    d = Math.pow(2, b);
                                    b++
                                }
                                delete m[r]
                            } else {
                                l = n[r];
                                for (h = 0; h < b; h++) {
                                    a = (a << 1) | (l & 1);
                                    if (j == 15) {
                                        j = 0;
                                        q += k(a);
                                        a = 0
                                    } else {
                                        j++
                                    }
                                    l = l >> 1
                                }
                            }
                            d--;
                            if (d == 0) {
                                d = Math.pow(2, b);
                                b++
                            }
                            n[c] = g++;
                            r = String(o)
                        }
                    }
                    if (r !== "") {
                        if (Object.prototype.hasOwnProperty.call(m, r)) {
                            if (r.charCodeAt(0) < 256) {
                                for (h = 0; h < b; h++) {
                                    a = (a << 1);
                                    if (j == 15) {
                                        j = 0;
                                        q += k(a);
                                        a = 0
                                    } else {
                                        j++
                                    }
                                }
                                l = r.charCodeAt(0);
                                for (h = 0; h < 8; h++) {
                                    a = (a << 1) | (l & 1);
                                    if (j == 15) {
                                        j = 0;
                                        q += k(a);
                                        a = 0
                                    } else {
                                        j++
                                    }
                                    l = l >> 1
                                }
                            } else {
                                l = 1;
                                for (h = 0; h < b; h++) {
                                    a = (a << 1) | l;
                                    if (j == 15) {
                                        j = 0;
                                        q += k(a);
                                        a = 0
                                    } else {
                                        j++
                                    }
                                    l = 0
                                }
                                l = r.charCodeAt(0);
                                for (h = 0; h < 16; h++) {
                                    a = (a << 1) | (l & 1);
                                    if (j == 15) {
                                        j = 0;
                                        q += k(a);
                                        a = 0
                                    } else {
                                        j++
                                    }
                                    l = l >> 1
                                }
                            }
                            d--;
                            if (d == 0) {
                                d = Math.pow(2, b);
                                b++
                            }
                            delete m[r]
                        } else {
                            l = n[r];
                            for (h = 0; h < b; h++) {
                                a = (a << 1) | (l & 1);
                                if (j == 15) {
                                    j = 0;
                                    q += k(a);
                                    a = 0
                                } else {
                                    j++
                                }
                                l = l >> 1
                            }
                        }
                        d--;
                        if (d == 0) {
                            d = Math.pow(2, b);
                            b++
                        }
                    }
                    l = 2;
                    for (h = 0; h < b; h++) {
                        a = (a << 1) | (l & 1);
                        if (j == 15) {
                            j = 0;
                            q += k(a);
                            a = 0
                        } else {
                            j++
                        }
                        l = l >> 1
                    }
                    while (true) {
                        a = (a << 1);
                        if (j == 15) {
                            q += k(a);
                            break
                        } else {
                            j++
                        }
                    }
                    return q
                },
                };
                console.log(LZString.compressToBase64(cc));
            """
            return js_str1 + js_str2
    
        def start(self):
            with open('wipo_js.js', 'w', encoding='utf-8') as ww:
                js_str = self.get_js_str()
                ww.write(js_str)
            res = os.popen('node wipo_js.js').read().replace('\n', '')
            print(res)
            return res
    
    
    if __name__ == '__main__':
        get_wipo_js = GetWipoJS()
        get_wipo_js.start()
    
    

    wipo_spider.py

    import requests
    import re
    import time
    from tools import get_ua
    from wipo_js import GetWipoJS
    
    replaces = lambda x: f'{x}'.replace('\n', '').strip()
    
    
    class MailRuPC(object):
        def __init__(self):
            self.qk = self.get_qk()
            self.get_wipo_js = GetWipoJS()
    
        def get_cookies(self):
            pass
    
        def get_headers(self):
            headers = {
                'Accept': 'application/json, text/javascript, */*; q=0.01',
                'Referer': 'https://www3.wipo.int/branddb/en/',
                'Origin': 'https://www3.wipo.int',
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': get_ua(),
                'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
            }
            return headers
    
        def get_qk(self):
            qi = ''
            # cookies = {
            #     '_pk_id.9.ec75': '12daa01499626e0d.1639492972.1.1639492972.1639492972.',
            #     '_pk_ses.9.ec75': '1',
            # }
            headers = {
                'User-Agent': get_ua(),
            }
            response = requests.get('https://www3.wipo.int/branddb/en/', headers=headers)
            text = response.text
            qis = re.findall('qk = "(.*?)"', text)
            if qis:
                if len(qis) == 2:
                    if len(qis[1]) == 44:
                        qi = qis[1]
            return qi
    
        def get_with_proxy(self, dd, timeout=60):
    
            target_url = dd.get('target_url')
            headers = dd.get('headers')
            data = dd.get('data')
    
            response = requests.post(url=target_url, headers=headers, data=data, timeout=timeout)
            # response.encoding = response.apparent_encoding
            encoding = ''.join(re.findall('charset=(.*)', response.headers.get('Content-Type')))
            response.encoding = encoding
            json_obj = response.json()
            code = response.status_code
            return code, json_obj
    
        def parse_all(self, json_obj):
            result = []
            qi = ''
            try:
                qi = json_obj.get('qi')
                details = json_obj.get('response', {}).get('docs')
                for detail in details:
                    item = {}
                    item['Brand'] = detail.get('BRAND')
                    item['Source'] = detail.get('SOURCE')
                    item['Status'] = detail.get('STATUS')
                    item['Relevance'] = detail.get('score')
                    item['Origin'] = detail.get('OO')
                    item['Holder'] = detail.get('HOL')
                    item['HolderCountry'] = detail.get('HOLC')
                    item['Number'] = detail.get('ID')
                    item['AppDate'] = detail.get('AD')
                    item['Imageclass'] = detail.get('IMGC')
                    item['NiceCl'] = detail.get('NC')
                    item['Image'] = detail.get('IMG')  # 08/47/M11580847-th.jpg
                    result.append(item)
            except Exception as e:
                msg = 'func parse_all error:%s' % repr(e)
                print(msg)
            return result, qi
    
        def control(self, qi, page):
            target_url = 'https://www3.wipo.int/branddb/jsp/select.jsp'
            qz = self.get_wipo_js.get_qz(qi=qi, queue=page)
            dd = {
                'target_url': target_url,
                'headers': self.get_headers(),
                'data': {'qz': qz},
            }
            code, json_obj = self.get_with_proxy(dd)
            if json_obj:
                print(json_obj)
                result, qi = self.parse_all(json_obj)
                print(result)
                print(f'qi:{qi}')
                return result, qi
    
        def start(self):
            print(f'qk:{self.qk}')
            qi = '0-' + self.qk
            for page in range(1, 2 + 1):
                print(f'page:{page}')
                result, qi = self.control(qi, page)
                time.sleep(10)
    
    
    if __name__ == '__main__':
        mail_ru = MailRuPC()
        # crawl_time = time.strftime('%Y-%m-%d', time.localtime())
        result = mail_ru.start()
    

    相关文章

      网友评论

          本文标题:采集wipo

          本文链接:https://www.haomeiwen.com/subject/weadfrtx.html