美文网首页000python
python字符串操作,删除、替换、拼接等

python字符串操作,删除、替换、拼接等

作者: 慧琴如翌 | 来源:发表于2018-04-26 02:15 被阅读1937次

    一、字符串删除、字符串替换

    1.删除字符串首尾的多余字符,strip(),lstrip(),rstrip()

    # 删除字符串中多余字符
    def string_remove():
       str1 = ' abc     \n'
       print str1.strip()   # abc
    
       str2 = '----abcdf++++'
       print str2.strip('-+')  # abcdf
    
    

    2.删除固定位置的字符串 ,切片+拼接的方式

    # 删除字符串中多余字符
    def string_remove2():
        str1 = 'abc*wrt'   #删除字符串中的*
        print str1[:3]+str1[4:]   # abcwrt
    

    3.字符串replace方法或者正则,删除任意位置的字符

    # 删除字符串中多余字符
    def string_remove3():
        str1 = '\nabc\nwrt22\n'   #删除字符串中的所有\n
        print str1.replace('\n','')   # abcwrt22
    
    
        str2 = '\nabc\nwrt22\t666\t'  # 删除字符串中的所有\n,\t
        import re
        print re.sub('[\n\t]','',str2)   # abcwrt22666
    
        str3 = 'abc123jm456n3fF'
        print re.sub(r'([\d]+)', '', str3) # abcjmnfF
        print re.sub(r'([\d]+)', '', str3).lower()  # abcjmnff
    
    

    4.字符串translate() 方法,同时删除多种不同字符

    
    # string有一个translate方法,第一个参数是映射的关系,第二个参数是删除操作
    def translate_pra():
        s = 'abc123456xyz'
        import string
    
        print s.translate(string.maketrans('abc','xyz'))  # xyz123456xyz
        s2 = 'abc\rdefff\nkl\t'
        print s2.translate(None,'\r\n\t')   # abcdefffkl
    

    二、将多个小字符串拼接成大字符串

    1. 迭代列表,连续使用“+”形式拼接每一个字符串

    # 字符串拼接
    
    def string_add():
    
        str1 = 'abc'
        str2 = '456'
        print str1+str2  # abc456
    
        l1 = ['a1','a2','a3']
        s = ''
        for i in l1:
            s += i  # a1a2a3,但是这种方法会生成很多临时字符串,s分别和列表中的第一个、第二个...字符拼接,推荐使用str.join()方法
        print s
    

    2. 使用str.join(),更加快速的拼接列表中的所有字符

    def string_join():
    
        print ';'.join(['aa','bb','cc'])  # aa;bb;cc
        print ''.join(['aa','bb','cc'])  # aabbcc
    
        l1 = ['a1', 'a2', 'a3']
        print ''.join(l1) # a1a2a3,拼接列表中的每一个字符
    
        # 如果列表中含有数字
        l2 = ['a1',123,'b1',456]
        print ''.join([str(x)for x in l2]) #使用列表解析式, a1123b1456
    
        # 列表解析式有一个问题,如果列表过长的话,会很耗费时间,我们可以使用生成器表达式。存储的开销比列表小的多
        print ''.join(str(x)for x in l2)  # a1123b1456
    

    三、判断字符串是否以某字符为结尾

    result = [name for name in os.listdir('.') if name.endswith(('.aa','.py'))]
    
    print os.getcwd()  # /Users/xxxxxx/files/pyhon/selfPractise/test
    print result    # ['__init__.py', 'test1_basic.py']
    

    四、取出csv文件中指定的字符

    # 取出csv文件中多个字段值符合条件的
    def csv_test():
        # f = open('11.txt','r')
        with open('1.txt','r') as f1,open('2.txt', 'w') as f2:
            for line in f1.readlines(): # 'aaaaafds\n'
                a = line.strip()  # a: 'aaaaafds'
                a0 = a[0]
                # if a.startswith(['a-z','A-Z']):
                if a0.isalpha():
                    f2.write(a+'\n')
        f1.close()
        f2.close()
    
    #
    # def csv_test2():
    #     csvFile = open('11.txt','r')
    #     reader = csv.reader(csvFile)
    #
    #     for item in reader:
    #         print item
    
    
    csv_test()
    

    1.txt文件中内容:
    aaaaafds
    1112333
    45667
    tttttt

    2.txt文件中内容:
    aaaaafds
    tttttt

    其他代码演示:

    #coding=utf-8
    
    # from dir1.test2 import test2_function
    import csv
    import string
    import sys
    import os
    from locale import normalize
    from operator import itemgetter
    
    reload(sys)
    sys.setdefaultencoding('utf-8')
    BASE_DIR = os.path.abspath(__file__)
    # BASE_DIR = os.path.dirname(os.path.abspath(__file__))
    sys.path.append(BASE_DIR)
    # print 'base_dir'
    # print 'basedir:',BASE_DIR
    
    # def  test1_function(n):
    #     print 'in function test1_function:',n
    
    
    
    list1 = ['b', 'c', 'd', 'b', 'c', 'a', 'a']
    
    
    
    # 创建字典的方式
    def dict1():
        x = {"a":"1", "b":"2"}
        print x    #输出:{'a': '1', 'b': '2'}
    
    
    def dict2():
        print dict(a='1',b='2')  # {'a': '1', 'b': '2'}
        print dict(a=1,b=2)  # {'a': 1, 'b': 2}
        # print dict('a'=1,'b'=2)  # 会报错
        # dict的入参是一个元组,元组内部是"一系列"包含两个值的元组或list
        print dict(([1,3],[2,4]))  # {1: 3, 2: 4}
        print dict((['a',3],['b',4],['c',3]))  # {'a': 3, 'c': 3, 'b': 4}
        print dict((('a',3),('b',4)))  # {'a': 3, 'b': 4}
        print dict((('a',3),['b',4]))  # {'a': 3, 'b': 4}
    
        # dict的入参是一个列表,列表内部是一系列包含两个值的元组或list
        print dict([['a',3],['b',4]])  # {'a': 3, 'b': 4}
        print dict([('a',3),['b',4]])  # {'a': 3, 'b': 4}
        print dict((('a',3),('b',4)))  # {'a': 3, 'b': 4}
    # dict2()
    
    def dict3():
        print dict.fromkeys(['a','b'])  # {'a': None, 'b': None}
        print dict.fromkeys(['a','b'],1) # {'a': 1, 'b': 1}
    
    
    # 字典的更新函数
    def dict_update():
        dict = {'a':1,'b':2}
        dict1 = {'sex':'male','age':28}
        dict.update(dict1)
        print dict  # {'a': 1, 'age': 28, 'b': 2, 'sex': 'male'}
        print dict1  # {'age': 28, 'sex': 'male'}
    
    # dict_update()
    
    # zip的用法
    
    def zip1():
        a = [1,2,3]
        b = [4,5,6]
        zipped = zip(a,b)
        print zipped    #[(1, 4), (2, 5), (3, 6)]
        c = (1,2,3)
        d = (4,5,6)
        zipped2 = zip(c,d)
        print zipped2  #[(1, 4), (2, 5), (3, 6)]
    
    # zip的结果可以作为入参生成字典
    def zip2():
        a = [1,2,3]
        b = [4,5,6]
        zipped = zip(a,b)    # [(1, 4), (2, 5), (3, 6)]
        dict1 = dict(zipped)  # {1: 4, 2: 5, 3: 6}
        print dict1
    # zip2()
    
    
    
    # 字符串、list、tuple转换
    def transf():
        s1 = 'python'
    
        #  string---->list
        l1 = list(s1)   # ['p', 'y', 't', 'h', 'o', 'n']
        print l1
    
        # list---->string
        s2 = ''.join(l1)
        print s2,type(s2)  # python <type 'str'>
    
        # list----tuple
        t1 = tuple(l1)
        print t1  # ('p', 'y', 't', 'h', 'o', 'n')
    
        # string----->list(整体的list)
        print s1.split()   # ['python']
        s3 = 'a b c'
        print s3.split()  #['a', 'b', 'c']
    
        # list中的字符串拼接起来
        l2 = ['i','love']
        print ''.join(l2),type(''.join(l2))    # ilove    <type 'str'>
    
        import re
        a='Beautiful, is; better*than\nugly'
        # 四个分隔符为:,  ;  *  \n
        x= re.split(',|; |\*|\n',a)
        print(x)   # ['Beautiful', ' is', 'better', 'than', 'ugly']
    
    # transf()
    
    # sort和sorted
    
    def sorted1():
        print sorted([36, 5, -12, 9, -21], key=abs) # [5, 9, -12, -21, 36]
    
        # 忽略大小写排序
        print sorted("This is a test string from Andrew".split(), key=str.lower)  # ['a', 'Andrew', 'from', 'is', 'string', 'test', 'This']
        # 不忽略大小写,大写字母的ASCII码比小写的字母小,所以排在前面
        print sorted("This is a test string from Andrew".split())  # ['Andrew', 'This', 'a', 'from', 'is', 'string', 'test']
    
    
    #     反向排序,只需要传入第三个参数reverse=True
        print sorted("This is a test string from Andrew".split(), key=str.lower, reverse=True)  # ['This', 'test', 'string', 'is', 'from', 'Andrew', 'a']
    
    
    
    # list排序;list中包含元组的排序;
    def sorted2():
        from operator import itemgetter
    
        L = ['bob', 'about', 'Zoo', 'Credit']
    
        print(sorted(L))
        print(sorted(L, key=str.lower))
    
        students = [('Bob', 75), ('Adam', 88), ('Bart', 66), ('Lisa', 88)]
    
        # 按照第零个域排序
        print(sorted(students, key=itemgetter(0)))
        # 下面两种写法是等效的
        print(sorted(students, key=itemgetter(1)))
        print(sorted(students, key=lambda t: t[1]))
    
    
        print(sorted(students, key=itemgetter(1,0)))
        print(sorted(students, key=itemgetter(1,0), reverse=True))
    
    # sorted2()
    
    # 按照含有子串个数多少排序
    def sort3():
        from operator import methodcaller
        messages = ['critical!!!', 'hurry!', 'standby', 'immediate!!']
        print sorted(messages, key=methodcaller('count', '!'))   # ['standby', 'hurry!', 'immediate!!', 'critical!!!']
    
    
    # 按照升序和降序排序
    def sort4():
        student_tuples = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
        print sorted(student_tuples, key=lambda student: student[2])  #[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]
        print sorted(student_tuples, key=lambda student: student[2],reverse=True) #[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
    ########################################################
    
    # python中数据类型的排序
    # 首先是字符串的排序
    
    def string_ordered1():
        str = 'i love python'
        print str[::-1],type(str[::-1])  # nohtyp evol i <type 'str'>
    
    
    def string_ordered2():
        s = 'i love python'
        l = list(s)
        l.sort()
        s = "".join(l)
        print s
    
    # 这种方法还没看太懂,先记录下
    def string_ordered3():
        s = "string"
    
        s = "".join((lambda x: (x.sort(), x)[1])(list(s)))
        print s
    
    
    
    # string_ordered2()
    
    
    # dict的排序
    
    def sortedDictValues1(adict):
        items = adict.items()
        items.sort()
        return [value for key, value in items]
    
    def sortedDictValues2(adict):
        keys = adict.keys()
        keys.sort()
        return [adict[key] for key in keys]
    
    def out1():
        dict1 = {'b':2,'a':1}
        print sortedDictValues1(dict1)   # [1, 2]
        print sorted(dict1.items(), key=lambda d: d[0])   #[('a', 1), ('b', 2)]
        print sorted(dict1.items(), key = itemgetter(0))  # [('a', 1), ('b', 2)]
    
    # out1()
    
    # 字典按顺序输出
    def dict_sorted3():
        dict1 = {'b':2,'a':1}
        keys = sorted(dict1.keys())
        dict2 = {}
        for i in keys:
            dict2.update({i:dict1[i]})
        print dict2
    
    # dict_sorted3()
    
    
    # 冒泡排序
    def bubble(bubbleList):
        listLength = len(bubbleList)
        while listLength > 0:
            for i in range(listLength - 1):
                if bubbleList[i] > bubbleList[i + 1]:
                    bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
                    bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
                    bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
            listLength -= 1
        print(bubbleList)
    
    
    def bubble2(bubbleList):
        listLength = len(bubbleList)
        while listLength > 0:
            for i in range(listLength - 1):
                if bubbleList[i] > bubbleList[i + 1]:
                    tmp = bubbleList[i]
                    bubbleList[i]=bubbleList[i+1]
                    bubbleList[i+1]=tmp
    
                    # bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
                    # bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
                    # bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
            listLength -= 1
        print(bubbleList)
    
    # bubbleList = [2, 8, 4, 7, 5, 9, 0]
    # bubble(bubbleList)
    
    
    # 递归排序
    def recursion(n):
        if n == 1:
            return 1
        else:
            return n * recursion(n-1)
    
    print(recursion(10))
    
    
    
    # 将list中的每个数字转换成字符串,用map
    
    def f(x):
        return x*x
    
    def map1():
        r = map(f, [1, 2, 3, 4, 5, 6, 7, 8, 9])
        print r  # [1, 4, 9, 16, 25, 36, 49, 64, 81]
    
    
    # 将list中的每个数字转换成字符串,用map
    def map2():
        r = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9])
        print r  # ['1', '2', '3', '4', '5', '6', '7', '8', '9']
    
    def map3():
        L1 = ['adam', 'LISA', 'barT']
        L2 = list(map(normalize, L1))
        print L2
    
    
    # map3()
    
    # reduce用法,将序列转换成整数
    def fn(x, y):
        return x * 10 + y
    
    
    def reduce1():
        # from functools import reduce
        r = reduce(fn, [3,6,8,9])
        print r,type(r)
    
    
    # reduce1()
    # print list([1,2,3])
    
    
    def zip1():
        a = [1, 2, 3]
        b = [4, 5, 6]
        print zip(a,b)  # [(1, 4), (2, 5), (3, 6)]
        c = (7,8,9)
        print zip(a,c)   # [(1, 7), (2, 8), (3, 9)]
    
    # 解压缩包
    def zip2():
        a = [1, 2, 3]
        b = [4, 5, 6]
        zipped = zip(a,b)
        print zip(*zipped)   # [(1, 2, 3), (4, 5, 6)]
    
    
    # zip2()
    
    
    
    # 去除list中的重复元素,且保持原来的顺序不变
    def remove_repeat1(list1):
    
        l2 = list(set(list1))
        print l2,type(l2)
        l2.sort(key=list1.index)
        return l2
    
    # 去除list中的重复元素,且保持原来的顺序不变
    def remove_repeat2(list1):
        l3 = []
        for i in list1:
            if i not in l3:
                l3.append(i)
        return l3
    
    
    # 去除list中的重复元素,且保持原来的顺序不变
    def remove_repeat3(list1):
        from collections import OrderedDict
        # d = OrderedDict.fromkeys(['a','b','c','a','b','c','c'])
        d = OrderedDict.fromkeys(list1)
        return list(d)
    
    
    # 去除list中的重复元素,顺序会变
    def remove_repeat4(list1):
        return list(set(list1))
    
    
    
    def remove_repeat5(list1):
        # a = [1,2,4,2,4,5,6,5,7,8,9,0]
        b = {}
        b=b.fromkeys(list1)
        c = b.keys()
        print c,type(c)
        c = sorted(c,key=list1.index)
        print c
    
    # remove_repeat5(list1)
    
    # 排序
    def sort1():
        students = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
        print sorted(students, key=lambda student: student[2])
    
    # sort1()
    
    
    
    # 去除字符串中的空格
    def list_space():
        list1 = [' ','ff','','ss','aaa','']
        # for i in list1:
        #     # print i
        #     if i == ' ':
        #         # print 'yichang'
        #         list1.remove(i)
        #     #     print i,list1
        ls = [x for x in list1 if x.strip()]
        print ls
    
    # 将字符串倒序
    def string_reverse(m):
        num=len(m)
        a=[]
        for i in range(num):
                a.append(m[num-1-i])#从最后一位的元素开始往新list内添加元素
        return ''.join(a)
    
    
    
    
    # 字符串的练习
    
    # 字符串大小写转换方法
    def string1():
        s='fu junmin'
        print s.capitalize()  #Fu junmin,只对字符串的首字母大写
        print s   #fu junmin
        s = 'The quick brown fox jumped over the lazy dog.'
        print(string.capwords(s))    #The Quick Brown Fox Jumped Over The Lazy Dog.
    
    # string1()
    
    def count_find():
        s = 'I love python!'
        print s.count('o')  # 2
        print s.find('o')   # 3
    
        print s.count('9')   # 0
        print s.find('9')  # -1
    
        print s.count('o',5)  # 1
        print s.find('o',5)   # 11
    
        print s.count('o',5,7)  # 0
        print s.find('o',5,7)   # 11
    
    # count_find()
    
    
    # append和extend区别
    def append_extend():
        '''
        记住一点:append是追加,extend是对列表的扩展
        :return:
        '''
    
        list1 = [1,2]
        list1.append([3,4])
        print list1    # [1, 2, [3, 4]]
    
        list1.extend([5,6])   # [1, 2, [3, 4], 5, 6]
        print list1
    
        list1.append('love')  # [1, 2, [3, 4], 5, 6, 'love']
        print list1
    
        list1.extend('python')   # [1, 2, [3, 4], 5, 6, 'love', 'p', 'y', 't', 'h', 'o', 'n']
        print list1
    
    # append_extend()
    
    
    def alphabet_sorted():
        '''
        对一篇文章中的单词按照字母排序
        :return:
        '''
        str = 'I love python. If you love too , join us !'
        list1 = str.split()
        list1.sort()
        print list1
    
    # alphabet_sorted()
    
    # 字符串倒序
    def string_reverse():
    
        list2 = 'I love python'
    
        list3 = list2.split()
        list3.reverse()
        print list3    #['python', 'love', 'I']
    
        str4 = ' '.join(list3)
        print str4  # python love I
    
    '''
    # 列出目录下的所有文件
    '''
    def list_dir(dn,list_name):
        import os
        # dn=os.getcwd()
        # 获取当前目录,/Users/fujunmin/files/python/selfPractise/test
        L=os.listdir(dn)
        print L
        print '\n'.join(L)
        for i in L:
            file_path = os.path.join(dn,i)
            # print file_path
            if os.path.isdir(file_path):
                list_dir(file_path,list_name)
            else:
                list_name.append(file_path)
        # with open('result.txt','w') as wf:
        #     wf.write('\n'.join(L))
    
    
    # dn = '/Users/fujunmin/files/python/selfPractise/test/tmp'
    # list_name = []
    #
    # list_dir(dn,list_name)
    # print 'wenjian:',list_name
    
    
    '''
    # 取出csv文件中多个字段值符合条件的
    
    '''
    def csv_test():
        # f = open('11.txt','r')
        with open('1.txt','r') as f1,open('2.txt', 'w') as f2:
            for line in f1.readlines(): # 'aaaaafds\n'
                a = line.strip()  # a: 'aaaaafds'
                a0 = a[0]
                # if a.startswith(['a-z','A-Z']):
                if a0.isalpha():
                    f2.write(a+'\n')
        f1.close()
        f2.close()
    
    #
    # def csv_test2():
    #     csvFile = open('11.txt','r')
    #     reader = csv.reader(csvFile)
    #
    #     for item in reader:
    #         print item
    
    
    # csv_test()
    
    
    '''
    # 删除字符串中多余字符
    
    '''
    def string_remove():
        str1 = ' abc     \n'
        print str1.strip()   # abc
    
        str2 = '----abcdf++++'
        print str2.strip('-+')  # abcdf
    
    # 删除字符串中多余字符
    def string_remove2():
        str1 = 'abc*wrt'   #删除字符串中的*
        print str1[:3]+str1[4:]   # abcwrt
    
    
    # 删除字符串中多余字符
    def string_remove3():
        str1 = '\nabc\nwrt22\n'   #删除字符串中的所有\n
        print str1.replace('\n','')   # abcwrt22
    
    
        str2 = '\nabc\nwrt22\t666\t'  # 删除字符串中的所有\n,\t
        import re
        print re.sub('[\n\t]','',str2)   # abcwrt22666
    
        str3 = 'abc123jm456n3fF'
        print re.sub(r'([\d]+)', '', str3) # abcjmnfF
        print re.sub(r'([\d]+)', '', str3).lower()  # abcjmnff
    
    
    # string有一个translate方法,第一个参数是映射的关系,第二个参数是删除操作
    def translate_pra():
        s = 'abc123456xyz'
        import string
    
        print s.translate(string.maketrans('abc','xyz'))  # xyz123456xyz
    
        s2 = 'abc\rdefff\nkl\t'
        print s2.translate(None,'\r\n\t')   # abcdefffkl
    
    # translate_pra()
    
    
    '''
    字符串拼接
    '''
    
    def string_add():
    
        str1 = 'abc'
        str2 = '456'
        print str1+str2  # abc456
    
        l1 = ['a1','a2','a3']
        s = ''
        for i in l1:
            s += i  # a1a2a3,但是这种方法会生成很多临时字符串,s分别和列表中的第一个、第二个...字符拼接,推荐使用str.join()方法
        print s
    
    def string_join():
    
        print ';'.join(['aa','bb','cc'])  # aa;bb;cc
        print ''.join(['aa','bb','cc'])  # aabbcc
    
        l1 = ['a1', 'a2', 'a3']
        print ''.join(l1) # a1a2a3,拼接列表中的每一个字符
    
        # 如果列表中含有数字
        l2 = ['a1',123,'b1',456]
        print ''.join([str(x)for x in l2]) #使用列表解析式, a1123b1456
    
        # 列表解析式有一个问题,如果列表过长的话,会很耗费时间,我们可以使用生成器表达式。存储的开销比列表小的多
        print ''.join(str(x)for x in l2)  # a1123b1456
    
    # string_join()
    
    
    
    '''
    string.endwith()用法
    '''
    
    # 判断一个字符串是不是对称字符串
    def symmetrical():
    
        str1 = "mnanm"
        list1 = list(str1)   #['m','n','a','n',m]
    
        list2 = list1
        list2.reverse()
        print list1
        print list2
        if list2 ==list1:
            print 'It is a symmetrical string'
    
    # symmetrical()
    
    if __name__=='__main__':
        # test2_function(3, 4)
        pass
    
        # aa = list_repeat([1, 2, 2, 3,0,0,5,5,4,4])
        # print aa,type(aa)
    
        # list_space()
        # aa  = string_reverse('abcd')
        # print aa,type(aa)
        # b = aa
        # print b
        # list1 = [1,1,3,8,8,4,4]
        # list1 = ['b','c','d','b','c','a','a']
        # # print list_repeat(['a','b','c','a','b','c','c'])
        # print remove_repeat5(list1)
    
    
    
    

    相关文章

      网友评论

        本文标题:python字符串操作,删除、替换、拼接等

        本文链接:https://www.haomeiwen.com/subject/cugulftx.html