美文网首页
辽经干3

辽经干3

作者: __method__ | 来源:发表于2021-04-07 11:31 被阅读0次

    set and string

    #set {}
    # dict {:, :}
    # tuple ()
    # list []
    # 无序不重复
    a = {"1", "2", "3", 4, 4, 4}
    print(a)
    print(type(a))
    nameset = ["张三", "李四", "李四", "李四", "李四"]
    print(len(nameset))
    print(nameset)
    print(set(nameset))
    print(len(set(nameset)))
    # print(a[0]) # TypeError: 'set' object is not subscriptable
    
    # string
    #用 ‘’  “” 中包含的都是string类型
    # 字符串一旦创建具有不可变行
    name = "$liao, ning jingji"
    print(len(name))
    print(name.split())
    print(name.replace(",", ";"))
    print(name)
    name = name.replace(",", ";")
    print(name)
    # 把列表变成字符串  join
    ls = ["I", "am", "a", "good", "student"]
    str1 = "-".join(ls)
    print(str1)
    str2 = " ".join(ls)
    print(str2)
    
    

    file io

    # 文件的读取与写入
    # 写入
    # , mode='w'代表写入文件
    s = "辽宁省沈阳市沈北新区"
    f = open('hello.txt', mode='w', encoding='utf-8')
    f.write(s)
    
    #  mode='r'是读取文件
    f = open('hello.txt', mode='r', encoding='utf-8')
    data = f.read()
    print(data)
    

    jieba

    # 中文分词技术
    # 直接使用 jieba 分词就可以
    # pip install jieba
    import jieba
    seg = "我来自北京清华大学"
    print(jieba.lcut(seg))
    

    sort

    counts = {'第一回': 1, '桃园': 19, '豪杰': 22, '结义': 14, '黄巾': 40,}
    # 排序
    # 字典 ---》 列表
    ls = list(counts.items())
    # 按照 元组的第二值进行排序
    ls.sort(key= lambda item :item[1], reverse=True)
    print(ls)
    

    analysis

    import jieba
    f = open('threekingdom.txt', mode='r', encoding='utf-8')
    data = f.read()
    print(len(data))
    word_list = jieba.lcut(data)
    print(len(word_list))
    # 词频统计
    counts = {}
    for word in word_list:
        if len(word) <= 1:
            continue
        else:
            counts[word]=counts.get(word, 0) + 1
    
    counts["孔明"] = counts["孔明"] + counts["孔明曰"]
    counts["关公"] = counts["关公"] + counts["云长"]
    counts["刘备"] = counts["刘备"] + counts["玄德曰"]+ counts["玄德"]
    stop_words = {"将军", "却说", "丞相", "孔明曰", "二人", "不可","荆州","不能",
                  "如此", "商议", "如何","主公", "军士", "军马", "左右", "玄德曰",
                  "玄德", "云长", "次日", "引兵", "大喜", "东吴", "于是", "今日"
                    , "不敢", "天下", "魏兵", "陛下", "都督"}
    for word in stop_words:
        del counts[word]
    
    ls = list(counts.items())
    ls.sort(key= lambda item :item[1], reverse=True)
    for name, num in ls[:10]:
        print(name, num)
    

    相关文章

      网友评论

          本文标题:辽经干3

          本文链接:https://www.haomeiwen.com/subject/nraykltx.html