美文网首页
中文数字转换为阿拉伯数字

中文数字转换为阿拉伯数字

作者: momo1023 | 来源:发表于2019-11-20 15:54 被阅读0次
    import regex as re
    
    def number_translator(target):
        '''
        该方法可以将字符串中所有的用汉字表示的数字转化为用阿拉伯数字表示的数字
        如"一千两百,六百零五"可以转化为"1200,605"
        此外添加支持了部分不规则表达方法:
        如两万零六百五可转化为20650
        两百一十四和两百十四都可以转化为214
        一六零加一五八可以转化为160+158
        该方法目前支持的正确转化范围是0-99999999
        
        :param target: 待转化的字符串
        :return: 转化完毕后的字符串
        '''
        
        def word2number(s):
            '''
            可将[零-九]正确翻译为[0-9]
    
            :param s: 大写数字
            :return: 对应的整形数,如果不是数字返回-1
            '''
            if (s == u'零') or (s == '0'):
                return 0
            elif (s == u'一') or (s == '1') or (s == u'壹'):
                return 1
            elif (s == u'二') or (s == '2') or (s == u'贰') or (s == u'两'):
                return 2
            elif (s == u'三') or (s == '3') or (s == u'叁'):
                return 3
            elif (s == u'四') or (s == '4') or (s == u'肆'):
                return 4
            elif (s == u'五') or (s == '5') or (s == u'伍'):
                return 5
            elif (s == u'六') or (s == '6') or (s == u'陆'):
                return 6
            elif (s == u'七') or (s == '7') or (s == u'柒') or (s == u'天') or (s == u'日') or (s == u'末'):
                return 7
            elif (s == u'八') or (s == '8') or (s == u'捌'):
                return 8
            elif (s == u'九') or (s == '9') or (s == u'玖'):
                return 9
        #     elif (s == u'十') or (s == u'拾'):
        #         return 10
        #     elif (s == u'百') or (s == u'佰'):
        #         return 100
        #     elif (s == u'千') or (s == u'仟'):
        #         return 1000
        #     elif (s == u'万') or (s == u'萬'):
        #         return 10000
        #     elif (s == u'亿'):
        #         return 100000000
            else:
                return -1
            
        def str2int(s):
            '''
            将字符数字转换为int
            '''
            try:
                res = int(s)
            except:
                res = 0
            return res
        
        pattern = re.compile(u"[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(亿|千|百|十))")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"万")
            s = list(s)
            num = 0
            if len(s) == 2:
                num += word2number(s[0]) * 10000 + word2number(s[1]) * 1000
            target = pattern.sub(str(num), target, 1)
    #     print(target)
        
        pattern = re.compile(u"[一二两三四五六七八九123456789]万[一二两三四五六七八九123456789](?!(千|百|十))")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"万")
            s = list(s)
            num = 0
            if len(s) == 2:
                num += word2number(s[0]) * 10000 + word2number(s[1]) * 1000
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"[一二两三四五六七八九123456789]千[一二两三四五六七八九123456789](?!(百|十))")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"千")
            s = list(filter(None, s))
            num = 0
            if len(s) == 2:
                num += word2number(s[0]) * 1000 + word2number(s[1]) * 100
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"[一二两三四五六七八九123456789]百[一二两三四五六七八九123456789](?!十)")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"百")
            s = list(filter(None, s))
            num = 0
            if len(s) == 2:
                num += word2number(s[0]) * 100 + word2number(s[1]) * 10
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"[零一二两三四五六七八九]")
        match = pattern.finditer(target)
        for m in match:
            target = pattern.sub(str(word2number(m.group())), target, 1)
    #     print(target)
    
        pattern = re.compile(u"(?<=(周|星期|天|日))[天|日|末]")
        match = pattern.finditer(target)
        for m in match:
            target = pattern.sub(str(word2number(m.group())), target, 1)
    #     print(target)
    
        pattern = re.compile(u"(?<!(周|星期))0?[0-9]?十[0-9]?")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"十")
            num = 0
            ten = str2int(s[0])
            if ten == 0:
                ten = 1
            unit = str2int(s[1])
            num = ten * 10 + unit
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"0?[1-9]百[0-9]?[0-9]?")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"百")
            s = list(filter(None, s))
            num = 0
            if len(s) == 1:
                hundred = int(s[0])
                num += hundred * 100
            elif len(s) == 2:
                hundred = int(s[0])
                num += hundred * 100
                num += int(s[1])
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"0?[1-9]千[0-9]?[0-9]?[0-9]?")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"千")
            s = list(filter(None, s))
            num = 0
            if len(s) == 1:
                thousand = int(s[0])
                num += thousand * 1000
            elif len(s) == 2:
                thousand = int(s[0])
                num += thousand * 1000
                num += int(s[1])
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        pattern = re.compile(u"[0-9]+万[0-9]?[0-9]?[0-9]?[0-9]?")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"万")
            s = list(filter(None, s))
            num = 0
            if len(s) == 1:
                tenthousand = int(s[0])
                num += tenthousand * 10000
            elif len(s) == 2:
                tenthousand = int(s[0])
                num += tenthousand * 10000
                num += int(s[1])
            target = pattern.sub(str(num), target, 1)
    #     print(target)
        
        pattern = re.compile(u"[0-9]+亿[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?[0-9]?")
        match = pattern.finditer(target)
        for m in match:
            group = m.group()
            s = group.split(u"亿")
            s = list(filter(None, s))
            num = 0
            if len(s) == 1:
                tenthousand = int(s[0])
                num += tenthousand * 100000000
            elif len(s) == 2:
                tenthousand = int(s[0])
                num += tenthousand * 100000000
                num += int(s[1])
            target = pattern.sub(str(num), target, 1)
    #     print(target)
    
        return target
    

    例子:

    target = '二十亿一百万一千零两百,二千零三十三,三个星期.三天,三十日,星期天,周末, 十万亿'
    print(number_translator(target))
    

    输出:

    2001001200,2033,3个星期.3天,30日,星期7,周7, 10000000000000
    

    相关文章

      网友评论

          本文标题:中文数字转换为阿拉伯数字

          本文链接:https://www.haomeiwen.com/subject/rlioictx.html