在列表、字典、集合中根据条件筛选数据

作者: 慧琴如翌 | 来源:发表于2018-04-26 18:17 被阅读17次

一、列表的过滤

1.传统做法是：

data = [1,5,-3,-4,6]
res = []
for x in data:
    if x>=0:
        res.append(x)
print res    # [1, 5, 6]

2.filter过滤

from random import randint
data = [randint(-3,3) for _ in xrange(5)]
print data   # [0, 1, -1, -1, 2]

print filter(lambda x:x>=0,data)  # [0, 1, 2],x接收的是data中的每一项，返回的是x>=0的项到最后的结果中

3. 列表解析：

print [x for x in data if x>=0]

两种方法那种好呢？

In [19]: data = [-2,9,3]

In [20]: timeit [x for x in data if x>=0]
The slowest run took 13.69 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 296 ns per loop

In [21]: timeit [x for x in data if x>=0]
1000000 loops, best of 3: 289 ns per loop

总结：一般推荐使用列表解析

二、字典

根据字典的值过滤字典

d = {x:randint(60,100) for x in xrange(1,21)}  # 生成的d是一个字典，现在过滤出来值高于90的项

print {k:v for k,v in d.items() if v>90}  # {1: 100, 3: 97, 6: 98, 9: 100, 12: 95, 13: 93, 17: 100, 18: 91}

三、集合

data = [randint(-3,3) for _ in xrange(5)]
s = set(data)    # s: {0, 2, 3}

print {x for x in s if x%3==0}

其他代码演示：


list1 = ['b', 'c', 'd', 'b', 'c', 'a', 'a']



# 创建字典的方式
def dict1():
    x = {"a":"1", "b":"2"}
    print x    #输出：{'a': '1', 'b': '2'}


def dict2():
    print dict(a='1',b='2')  # {'a': '1', 'b': '2'}
    print dict(a=1,b=2)  # {'a': 1, 'b': 2}
    # print dict('a'=1,'b'=2)  # 会报错
    # dict的入参是一个元组，元组内部是"一系列"包含两个值的元组或list
    print dict(([1,3],[2,4]))  # {1: 3, 2: 4}
    print dict((['a',3],['b',4],['c',3]))  # {'a': 3, 'c': 3, 'b': 4}
    print dict((('a',3),('b',4)))  # {'a': 3, 'b': 4}
    print dict((('a',3),['b',4]))  # {'a': 3, 'b': 4}

    # dict的入参是一个列表,列表内部是一系列包含两个值的元组或list
    print dict([['a',3],['b',4]])  # {'a': 3, 'b': 4}
    print dict([('a',3),['b',4]])  # {'a': 3, 'b': 4}
    print dict((('a',3),('b',4)))  # {'a': 3, 'b': 4}
# dict2()

def dict3():
    print dict.fromkeys(['a','b'])  # {'a': None, 'b': None}
    print dict.fromkeys(['a','b'],1) # {'a': 1, 'b': 1}


# 字典的更新函数
def dict_update():
    dict = {'a':1,'b':2}
    dict1 = {'sex':'male','age':28}
    dict.update(dict1)
    print dict  # {'a': 1, 'age': 28, 'b': 2, 'sex': 'male'}
    print dict1  # {'age': 28, 'sex': 'male'}

# dict_update()

# zip的用法

def zip1():
    a = [1,2,3]
    b = [4,5,6]
    zipped = zip(a,b)
    print zipped    #[(1, 4), (2, 5), (3, 6)]
    c = (1,2,3)
    d = (4,5,6)
    zipped2 = zip(c,d)
    print zipped2  #[(1, 4), (2, 5), (3, 6)]

# zip的结果可以作为入参生成字典
def zip2():
    a = [1,2,3]
    b = [4,5,6]
    zipped = zip(a,b)    # [(1, 4), (2, 5), (3, 6)]
    dict1 = dict(zipped)  # {1: 4, 2: 5, 3: 6}
    print dict1
# zip2()



# 字符串、list、tuple转换
def transf():
    s1 = 'python'

    #  string---->list
    l1 = list(s1)   # ['p', 'y', 't', 'h', 'o', 'n']
    print l1

    # list---->string
    s2 = ''.join(l1)
    print s2,type(s2)  # python <type 'str'>

    # list----tuple
    t1 = tuple(l1)
    print t1  # ('p', 'y', 't', 'h', 'o', 'n')

    # string----->list(整体的list)
    print s1.split()   # ['python']
    s3 = 'a b c'
    print s3.split()  #['a', 'b', 'c']

    # list中的字符串拼接起来
    l2 = ['i','love']
    print ''.join(l2),type(''.join(l2))    # ilove    <type 'str'>

    import re
    a='Beautiful, is; better*than\nugly'
    # 四个分隔符为：,  ;  *  \n
    x= re.split(',|; |\*|\n',a)
    print(x)   # ['Beautiful', ' is', 'better', 'than', 'ugly']

# transf()

# sort和sorted

def sorted1():
    print sorted([36, 5, -12, 9, -21], key=abs) # [5, 9, -12, -21, 36]

    # 忽略大小写排序
    print sorted("This is a test string from Andrew".split(), key=str.lower)  # ['a', 'Andrew', 'from', 'is', 'string', 'test', 'This']
    # 不忽略大小写，大写字母的ASCII码比小写的字母小，所以排在前面
    print sorted("This is a test string from Andrew".split())  # ['Andrew', 'This', 'a', 'from', 'is', 'string', 'test']


#     反向排序，只需要传入第三个参数reverse=True
    print sorted("This is a test string from Andrew".split(), key=str.lower, reverse=True)  # ['This', 'test', 'string', 'is', 'from', 'Andrew', 'a']



# list排序；list中包含元组的排序；
def sorted2():
    from operator import itemgetter

    L = ['bob', 'about', 'Zoo', 'Credit']

    print(sorted(L))
    print(sorted(L, key=str.lower))

    students = [('Bob', 75), ('Adam', 88), ('Bart', 66), ('Lisa', 88)]

    # 按照第零个域排序
    print(sorted(students, key=itemgetter(0)))
    # 下面两种写法是等效的
    print(sorted(students, key=itemgetter(1)))
    print(sorted(students, key=lambda t: t[1]))


    print(sorted(students, key=itemgetter(1,0)))
    print(sorted(students, key=itemgetter(1,0), reverse=True))

# sorted2()

# 按照含有子串个数多少排序
def sort3():
    from operator import methodcaller
    messages = ['critical!!!', 'hurry!', 'standby', 'immediate!!']
    print sorted(messages, key=methodcaller('count', '!'))   # ['standby', 'hurry!', 'immediate!!', 'critical!!!']


# 按照升序和降序排序
def sort4():
    student_tuples = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
    print sorted(student_tuples, key=lambda student: student[2])  #[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]
    print sorted(student_tuples, key=lambda student: student[2],reverse=True) #[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
########################################################

# python中数据类型的排序
# 首先是字符串的排序

def string_ordered1():
    str = 'i love python'
    print str[::-1],type(str[::-1])  # nohtyp evol i <type 'str'>


def string_ordered2():
    s = 'i love python'
    l = list(s)
    l.sort()
    s = "".join(l)
    print s

# 这种方法还没看太懂，先记录下
def string_ordered3():
    s = "string"

    s = "".join((lambda x: (x.sort(), x)[1])(list(s)))
    print s



# string_ordered2()







# 冒泡排序
def bubble(bubbleList):
    listLength = len(bubbleList)
    while listLength > 0:
        for i in range(listLength - 1):
            if bubbleList[i] > bubbleList[i + 1]:
                bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
                bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
                bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
        listLength -= 1
    print(bubbleList)


def bubble2(bubbleList):
    listLength = len(bubbleList)
    while listLength > 0:
        for i in range(listLength - 1):
            if bubbleList[i] > bubbleList[i + 1]:
                tmp = bubbleList[i]
                bubbleList[i]=bubbleList[i+1]
                bubbleList[i+1]=tmp

                # bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
                # bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
                # bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
        listLength -= 1
    print(bubbleList)

# bubbleList = [2, 8, 4, 7, 5, 9, 0]
# bubble(bubbleList)




# 递归排序
def recursion(n):
    if n == 1:
        return 1
    else:
        return n * recursion(n-1)

print(recursion(10))






# 将list中的每个数字转换成字符串，用map

def f(x):
    return x*x

def map1():
    r = map(f, [1, 2, 3, 4, 5, 6, 7, 8, 9])
    print r  # [1, 4, 9, 16, 25, 36, 49, 64, 81]


# 将list中的每个数字转换成字符串，用map
def map2():
    r = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9])
    print r  # ['1', '2', '3', '4', '5', '6', '7', '8', '9']

def map3():
    L1 = ['adam', 'LISA', 'barT']
    L2 = list(map(normalize, L1))
    print L2


# map3()

# reduce用法,将序列转换成整数
def fn(x, y):
    return x * 10 + y


def reduce1():
    # from functools import reduce
    r = reduce(fn, [3,6,8,9])
    print r,type(r)


# reduce1()
# print list([1,2,3])




def zip1():
    a = [1, 2, 3]
    b = [4, 5, 6]
    print zip(a,b)  # [(1, 4), (2, 5), (3, 6)]
    c = (7,8,9)
    print zip(a,c)   # [(1, 7), (2, 8), (3, 9)]

# 解压缩包
def zip2():
    a = [1, 2, 3]
    b = [4, 5, 6]
    zipped = zip(a,b)
    print zip(*zipped)   # [(1, 2, 3), (4, 5, 6)]


# zip2()







# 去除list中的重复元素，且保持原来的顺序不变
def remove_repeat1(list1):

    l2 = list(set(list1))
    print l2,type(l2)
    l2.sort(key=list1.index)
    return l2

# 去除list中的重复元素，且保持原来的顺序不变
def remove_repeat2(list1):
    l3 = []
    for i in list1:
        if i not in l3:
            l3.append(i)
    return l3


# 去除list中的重复元素，且保持原来的顺序不变
def remove_repeat3(list1):
    from collections import OrderedDict
    # d = OrderedDict.fromkeys(['a','b','c','a','b','c','c'])
    d = OrderedDict.fromkeys(list1)
    return list(d)


# 去除list中的重复元素，顺序会变
def remove_repeat4(list1):
    return list(set(list1))



def remove_repeat5(list1):
    # a = [1,2,4,2,4,5,6,5,7,8,9,0]
    b = {}
    b=b.fromkeys(list1)
    c = b.keys()
    print c,type(c)
    c = sorted(c,key=list1.index)
    print c

# remove_repeat5(list1)

# 排序
def sort1():
    students = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
    print sorted(students, key=lambda student: student[2])

# sort1()



# 去除字符串中的空格
def list_space():
    list1 = [' ','ff','','ss','aaa','']
    # for i in list1:
    #     # print i
    #     if i == ' ':
    #         # print 'yichang'
    #         list1.remove(i)
    #     #     print i,list1
    ls = [x for x in list1 if x.strip()]
    print ls

# 将字符串倒序
def string_reverse(m):
    num=len(m)
    a=[]
    for i in range(num):
            a.append(m[num-1-i])#从最后一位的元素开始往新list内添加元素
    return ''.join(a)




# 字符串的练习

# 字符串大小写转换方法
def string1():
    s='fu junmin'
    print s.capitalize()  #Fu junmin,只对字符串的首字母大写
    print s   #fu junmin
    s = 'The quick brown fox jumped over the lazy dog.'
    print(string.capwords(s))    #The Quick Brown Fox Jumped Over The Lazy Dog.

# string1()

def count_find():
    s = 'I love python!'
    print s.count('o')  # 2
    print s.find('o')   # 3

    print s.count('9')   # 0
    print s.find('9')  # -1

    print s.count('o',5)  # 1
    print s.find('o',5)   # 11

    print s.count('o',5,7)  # 0
    print s.find('o',5,7)   # 11

# count_find()


# append和extend区别
def append_extend():
    '''
    记住一点：append是追加，extend是对列表的扩展
    :return:
    '''

    list1 = [1,2]
    list1.append([3,4])
    print list1    # [1, 2, [3, 4]]

    list1.extend([5,6])   # [1, 2, [3, 4], 5, 6]
    print list1

    list1.append('love')  # [1, 2, [3, 4], 5, 6, 'love']
    print list1

    list1.extend('python')   # [1, 2, [3, 4], 5, 6, 'love', 'p', 'y', 't', 'h', 'o', 'n']
    print list1

# append_extend()


def alphabet_sorted():
    '''
    对一篇文章中的单词按照字母排序
    :return:
    '''
    str = 'I love python. If you love too , join us !'
    list1 = str.split()
    list1.sort()
    print list1

# alphabet_sorted()

# 字符串倒序
def string_reverse():

    list2 = 'I love python'

    list3 = list2.split()
    list3.reverse()
    print list3    #['python', 'love', 'I']

    str4 = ' '.join(list3)
    print str4  # python love I



'''
# 列出目录下的所有文件
'''
def list_dir(dn,list_name):
    import os
    # dn=os.getcwd()
    # 获取当前目录,/Users/fujunmin/files/python/selfPractise/test
    L=os.listdir(dn)
    print L
    print '\n'.join(L)
    for i in L:
        file_path = os.path.join(dn,i)
        # print file_path
        if os.path.isdir(file_path):
            list_dir(file_path,list_name)
        else:
            list_name.append(file_path)
    # with open('result.txt','w') as wf:
    #     wf.write('\n'.join(L))


# dn = '/Users/fujunmin/files/python/selfPractise/test/tmp'
# list_name = []
#
# list_dir(dn,list_name)
# print 'wenjian:',list_name



# result = [name for name in os.listdir('.') if name.endswith(('.aa','.py'))]
#
# print os.getcwd()  # /Users/fujunmin/files/python/selfPractise/test
# print result    # ['__init__.py', 'test1_basic.py']



'''
# 取出csv文件中多个字段值符合条件的

'''
def csv_test():
    # f = open('11.txt','r')
    with open('1.txt','r') as f1,open('2.txt', 'w') as f2:
        for line in f1.readlines(): # 'aaaaafds\n'
            a = line.strip()  # a: 'aaaaafds'
            a0 = a[0]
            # if a.startswith(['a-z','A-Z']):
            if a0.isalpha():
                f2.write(a+'\n')
    f1.close()
    f2.close()

#
# def csv_test2():
#     csvFile = open('11.txt','r')
#     reader = csv.reader(csvFile)
#
#     for item in reader:
#         print item


# csv_test()


'''
# 删除字符串中多余字符

'''
def string_remove():
    str1 = ' abc     \n'
    print str1.strip()   # abc

    str2 = '----abcdf++++'
    print str2.strip('-+')  # abcdf

# 删除字符串中多余字符
def string_remove2():
    str1 = 'abc*wrt'   #删除字符串中的*
    print str1[:3]+str1[4:]   # abcwrt


# 删除字符串中多余字符
def string_remove3():
    str1 = '\nabc\nwrt22\n'   #删除字符串中的所有\n
    print str1.replace('\n','')   # abcwrt22


    str2 = '\nabc\nwrt22\t666\t'  # 删除字符串中的所有\n,\t
    import re
    print re.sub('[\n\t]','',str2)   # abcwrt22666

    str3 = 'abc123jm456n3fF'
    print re.sub(r'([\d]+)', '', str3) # abcjmnfF
    print re.sub(r'([\d]+)', '', str3).lower()  # abcjmnff


# string有一个translate方法,第一个参数是映射的关系，第二个参数是删除操作
def translate_pra():
    s = 'abc123456xyz'
    import string

    print s.translate(string.maketrans('abc','xyz'))  # xyz123456xyz

    s2 = 'abc\rdefff\nkl\t'
    print s2.translate(None,'\r\n\t')   # abcdefffkl

# translate_pra()


'''
字符串拼接
'''

def string_add():

    str1 = 'abc'
    str2 = '456'
    print str1+str2  # abc456

    l1 = ['a1','a2','a3']
    s = ''
    for i in l1:
        s += i  # a1a2a3,但是这种方法会生成很多临时字符串，s分别和列表中的第一个、第二个...字符拼接，推荐使用str.join()方法
    print s

def string_join():

    print ';'.join(['aa','bb','cc'])  # aa;bb;cc
    print ''.join(['aa','bb','cc'])  # aabbcc

    l1 = ['a1', 'a2', 'a3']
    print ''.join(l1) # a1a2a3,拼接列表中的每一个字符

    # 如果列表中含有数字
    l2 = ['a1',123,'b1',456]
    print ''.join([str(x)for x in l2]) #使用列表解析式， a1123b1456

    # 列表解析式有一个问题，如果列表过长的话，会很耗费时间，我们可以使用生成器表达式。存储的开销比列表小的多
    print ''.join(str(x)for x in l2)  # a1123b1456

# string_join()



'''
string.endwith()用法
'''









# 判断一个字符串是不是对称字符串
def symmetrical():

    str1 = "mnanm"
    list1 = list(str1)   #['m','n','a','n',m]

    list2 = list1
    list2.reverse()
    print list1
    print list2
    if list2 ==list1:
        print 'It is a symmetrical string'

# symmetrical()



# from urllib import urlretrieve
#
# urlretrieve('http://table.finance.yahoo.com/table.csv?s=000001.sz','pingan.csv')


'''
imooc--python进阶课程
'''
#
from random import randint
# data = [randint(-3,3) for _ in xrange(5)]
# print data   # [0, 1, -1, -1, 2]
#
# print filter(lambda x:x>=0,data)  # [0, 1, 2],x接收的是data中的每一项，返回的是x>=0的项到最后的结果中

#

# d = {x:randint(60,100) for x in xrange(1,21)}  # 生成的d是一个字典，现在过滤出来值高于90的项
#
# print {k:v for k,v in d.items() if v>90}  # {1: 100, 3: 97, 6: 98, 9: 100, 12: 95, 13: 93, 17: 100, 18: 91}

# data = [randint(-3,3) for _ in xrange(5)]
# s = set(data)    # s: {0, 2, 3}
#
# print {x for x in s if x%3==0}




def  calculate_count():
    from random import randint
    data = [randint(0,3) for _ in xrange(6)]
    print data  # [1, 3, 3, 2, 2, 1]

    c = dict.fromkeys(data,0)   # {0: 0, 1: 0, 2: 0, 3: 0}

    for x in data:
        c[x]+=1
    print c   # {1: 2, 2: 2, 3: 2}

    print {k:v for k,v in c.items() if v>90}

def  calculate_count2():
    from collections import Counter

    data = [1,2,2,2,3,3,5,6,6,6,6]
    c = Counter(data)
    print c   # Counter({6: 4, 2: 3, 3: 2, 1: 1, 5: 1}),得到一个字典
    print c[6]  # 4
    print c.most_common(2)  # [(6, 4), (2, 3)],查找出现频度为前两个的


def  calculate_count3():
    from collections import Counter
    content = open('2.txt','r').read()
    print content,type(content)  # i love python , and you? if you love it too? <type 'str'>
    import re
    c = Counter(re.split('\W+',content))
    print c  # Counter({'love': 2, 'you': 2, 'and': 1, '': 1, 'i': 1, 'it': 1, 'python': 1, 'too': 1, 'if': 1})
    num = c.most_common(1)
    print num  # [('love', 2)]

# calculate_count3()

在列表、字典、集合中根据条件筛选数据

一、列表的过滤

1.传统做法是：

2.filter过滤

3. 列表解析：

总结：一般推荐使用列表解析

二、字典

根据字典的值过滤字典

三、集合

相关文章

网友评论

延伸阅读

深度阅读

栏目导航

热点阅读