美文网首页python源码大全汉字GBK与Unicode
Python代码库之解析unicode部首

Python代码库之解析unicode部首

作者: iCloudEnd | 来源:发表于2019-08-12 22:08 被阅读2次
    import sqlite3
    import json
    import os
    
    '''
    from cjk import *
    dbname='cjk_kangxi_bushou.db'
    filepath='Unihan12/a.txt'
    paraList=u2para(filepath)
    mdx2db(dbname,paraList)
    '''
    
    def u2para(filepath):
        filelist=[]
        realist=[]
        paralist=[]
        for line in open(filepath): 
            filelist.append(line) 
        for item in filelist: 
            item=item.replace('\n','').strip()
            if item  == '': continue 
            if item[0] == '#' : continue 
            realist.append(item) 
    
        for  item in realist:
            slist=item.split(';')
            cjk_id=slist[0].strip()
            cjk_code=slist[1].strip()
            cjk_uni=slist[2].strip()
            mkey=u2w(cjk_uni)
            paralist.append((cjk_id,cjk_code,cjk_uni,mkey))
    
        print('filelist',len(filelist))
        print('realist',len(realist))
        print('paralist',len(paralist))
        return paralist
    
    
    def u2w(cstr):
        return chr(int('0x'+cstr,16))
    
    def mdx2db(dbname,paraList):
    
        if os.path.isfile(dbname):
            os.remove(dbname)
    
        createdb(dbname)
            
        #sqlite
        import sqlite3
        conn = sqlite3.connect(dbname)
        c = conn.cursor()
        for item in paraList:
            
            msql='''INSERT INTO cjk_kangxi_bushou(cjk_id,cjk_code,cjk_uni ,mkey) VALUES (?,?,?,?)''' 
            para=item
            c.execute(msql,para)
            
        conn.commit()
        conn.close()
    
    
    def  createdb(dbname):
        conn = sqlite3.connect(dbname)
        c = conn.cursor()
        
        c.execute('''CREATE TABLE cjk_kangxi_bushou
            (ID INTEGER PRIMARY KEY  AUTOINCREMENT,
            cjk_id          TEXT    NOT NULL,
            cjk_code          TEXT ,
            cjk_uni         TEXT  ,
            mkey            TEXT
    
             );''')
        conn.commit()
        conn.close()
        return 'ok'
    
    

    更多精彩代码请关注我的专栏

    相关文章

      网友评论

        本文标题:Python代码库之解析unicode部首

        本文链接:https://www.haomeiwen.com/subject/dnnpjctx.html