一、列表的过滤
1.传统做法是:
data = [1,5,-3,-4,6]
res = []
for x in data:
if x>=0:
res.append(x)
print res # [1, 5, 6]
2.filter过滤
from random import randint
data = [randint(-3,3) for _ in xrange(5)]
print data # [0, 1, -1, -1, 2]
print filter(lambda x:x>=0,data) # [0, 1, 2],x接收的是data中的每一项,返回的是x>=0的项到最后的结果中
3. 列表解析:
print [x for x in data if x>=0]
两种方法那种好呢?
In [19]: data = [-2,9,3]
In [20]: timeit [x for x in data if x>=0]
The slowest run took 13.69 times longer than the fastest. This could mean that an intermediate result is being cached.
1000000 loops, best of 3: 296 ns per loop
In [21]: timeit [x for x in data if x>=0]
1000000 loops, best of 3: 289 ns per loop
总结:一般推荐使用列表解析
二、字典
根据字典的值过滤字典
d = {x:randint(60,100) for x in xrange(1,21)} # 生成的d是一个字典,现在过滤出来值高于90的项
print {k:v for k,v in d.items() if v>90} # {1: 100, 3: 97, 6: 98, 9: 100, 12: 95, 13: 93, 17: 100, 18: 91}
三、集合
data = [randint(-3,3) for _ in xrange(5)]
s = set(data) # s: {0, 2, 3}
print {x for x in s if x%3==0}
其他代码演示:
list1 = ['b', 'c', 'd', 'b', 'c', 'a', 'a']
# 创建字典的方式
def dict1():
x = {"a":"1", "b":"2"}
print x #输出:{'a': '1', 'b': '2'}
def dict2():
print dict(a='1',b='2') # {'a': '1', 'b': '2'}
print dict(a=1,b=2) # {'a': 1, 'b': 2}
# print dict('a'=1,'b'=2) # 会报错
# dict的入参是一个元组,元组内部是"一系列"包含两个值的元组或list
print dict(([1,3],[2,4])) # {1: 3, 2: 4}
print dict((['a',3],['b',4],['c',3])) # {'a': 3, 'c': 3, 'b': 4}
print dict((('a',3),('b',4))) # {'a': 3, 'b': 4}
print dict((('a',3),['b',4])) # {'a': 3, 'b': 4}
# dict的入参是一个列表,列表内部是一系列包含两个值的元组或list
print dict([['a',3],['b',4]]) # {'a': 3, 'b': 4}
print dict([('a',3),['b',4]]) # {'a': 3, 'b': 4}
print dict((('a',3),('b',4))) # {'a': 3, 'b': 4}
# dict2()
def dict3():
print dict.fromkeys(['a','b']) # {'a': None, 'b': None}
print dict.fromkeys(['a','b'],1) # {'a': 1, 'b': 1}
# 字典的更新函数
def dict_update():
dict = {'a':1,'b':2}
dict1 = {'sex':'male','age':28}
dict.update(dict1)
print dict # {'a': 1, 'age': 28, 'b': 2, 'sex': 'male'}
print dict1 # {'age': 28, 'sex': 'male'}
# dict_update()
# zip的用法
def zip1():
a = [1,2,3]
b = [4,5,6]
zipped = zip(a,b)
print zipped #[(1, 4), (2, 5), (3, 6)]
c = (1,2,3)
d = (4,5,6)
zipped2 = zip(c,d)
print zipped2 #[(1, 4), (2, 5), (3, 6)]
# zip的结果可以作为入参生成字典
def zip2():
a = [1,2,3]
b = [4,5,6]
zipped = zip(a,b) # [(1, 4), (2, 5), (3, 6)]
dict1 = dict(zipped) # {1: 4, 2: 5, 3: 6}
print dict1
# zip2()
# 字符串、list、tuple转换
def transf():
s1 = 'python'
# string---->list
l1 = list(s1) # ['p', 'y', 't', 'h', 'o', 'n']
print l1
# list---->string
s2 = ''.join(l1)
print s2,type(s2) # python <type 'str'>
# list----tuple
t1 = tuple(l1)
print t1 # ('p', 'y', 't', 'h', 'o', 'n')
# string----->list(整体的list)
print s1.split() # ['python']
s3 = 'a b c'
print s3.split() #['a', 'b', 'c']
# list中的字符串拼接起来
l2 = ['i','love']
print ''.join(l2),type(''.join(l2)) # ilove <type 'str'>
import re
a='Beautiful, is; better*than\nugly'
# 四个分隔符为:, ; * \n
x= re.split(',|; |\*|\n',a)
print(x) # ['Beautiful', ' is', 'better', 'than', 'ugly']
# transf()
# sort和sorted
def sorted1():
print sorted([36, 5, -12, 9, -21], key=abs) # [5, 9, -12, -21, 36]
# 忽略大小写排序
print sorted("This is a test string from Andrew".split(), key=str.lower) # ['a', 'Andrew', 'from', 'is', 'string', 'test', 'This']
# 不忽略大小写,大写字母的ASCII码比小写的字母小,所以排在前面
print sorted("This is a test string from Andrew".split()) # ['Andrew', 'This', 'a', 'from', 'is', 'string', 'test']
# 反向排序,只需要传入第三个参数reverse=True
print sorted("This is a test string from Andrew".split(), key=str.lower, reverse=True) # ['This', 'test', 'string', 'is', 'from', 'Andrew', 'a']
# list排序;list中包含元组的排序;
def sorted2():
from operator import itemgetter
L = ['bob', 'about', 'Zoo', 'Credit']
print(sorted(L))
print(sorted(L, key=str.lower))
students = [('Bob', 75), ('Adam', 88), ('Bart', 66), ('Lisa', 88)]
# 按照第零个域排序
print(sorted(students, key=itemgetter(0)))
# 下面两种写法是等效的
print(sorted(students, key=itemgetter(1)))
print(sorted(students, key=lambda t: t[1]))
print(sorted(students, key=itemgetter(1,0)))
print(sorted(students, key=itemgetter(1,0), reverse=True))
# sorted2()
# 按照含有子串个数多少排序
def sort3():
from operator import methodcaller
messages = ['critical!!!', 'hurry!', 'standby', 'immediate!!']
print sorted(messages, key=methodcaller('count', '!')) # ['standby', 'hurry!', 'immediate!!', 'critical!!!']
# 按照升序和降序排序
def sort4():
student_tuples = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
print sorted(student_tuples, key=lambda student: student[2]) #[('dave', 'B', 10), ('jane', 'B', 12), ('john', 'A', 15)]
print sorted(student_tuples, key=lambda student: student[2],reverse=True) #[('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
########################################################
# python中数据类型的排序
# 首先是字符串的排序
def string_ordered1():
str = 'i love python'
print str[::-1],type(str[::-1]) # nohtyp evol i <type 'str'>
def string_ordered2():
s = 'i love python'
l = list(s)
l.sort()
s = "".join(l)
print s
# 这种方法还没看太懂,先记录下
def string_ordered3():
s = "string"
s = "".join((lambda x: (x.sort(), x)[1])(list(s)))
print s
# string_ordered2()
# 冒泡排序
def bubble(bubbleList):
listLength = len(bubbleList)
while listLength > 0:
for i in range(listLength - 1):
if bubbleList[i] > bubbleList[i + 1]:
bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
listLength -= 1
print(bubbleList)
def bubble2(bubbleList):
listLength = len(bubbleList)
while listLength > 0:
for i in range(listLength - 1):
if bubbleList[i] > bubbleList[i + 1]:
tmp = bubbleList[i]
bubbleList[i]=bubbleList[i+1]
bubbleList[i+1]=tmp
# bubbleList[i] = bubbleList[i] + bubbleList[i + 1]
# bubbleList[i + 1] = bubbleList[i] - bubbleList[i + 1]
# bubbleList[i] = bubbleList[i] - bubbleList[i + 1]
listLength -= 1
print(bubbleList)
# bubbleList = [2, 8, 4, 7, 5, 9, 0]
# bubble(bubbleList)
# 递归排序
def recursion(n):
if n == 1:
return 1
else:
return n * recursion(n-1)
print(recursion(10))
# 将list中的每个数字转换成字符串,用map
def f(x):
return x*x
def map1():
r = map(f, [1, 2, 3, 4, 5, 6, 7, 8, 9])
print r # [1, 4, 9, 16, 25, 36, 49, 64, 81]
# 将list中的每个数字转换成字符串,用map
def map2():
r = map(str, [1, 2, 3, 4, 5, 6, 7, 8, 9])
print r # ['1', '2', '3', '4', '5', '6', '7', '8', '9']
def map3():
L1 = ['adam', 'LISA', 'barT']
L2 = list(map(normalize, L1))
print L2
# map3()
# reduce用法,将序列转换成整数
def fn(x, y):
return x * 10 + y
def reduce1():
# from functools import reduce
r = reduce(fn, [3,6,8,9])
print r,type(r)
# reduce1()
# print list([1,2,3])
def zip1():
a = [1, 2, 3]
b = [4, 5, 6]
print zip(a,b) # [(1, 4), (2, 5), (3, 6)]
c = (7,8,9)
print zip(a,c) # [(1, 7), (2, 8), (3, 9)]
# 解压缩包
def zip2():
a = [1, 2, 3]
b = [4, 5, 6]
zipped = zip(a,b)
print zip(*zipped) # [(1, 2, 3), (4, 5, 6)]
# zip2()
# 去除list中的重复元素,且保持原来的顺序不变
def remove_repeat1(list1):
l2 = list(set(list1))
print l2,type(l2)
l2.sort(key=list1.index)
return l2
# 去除list中的重复元素,且保持原来的顺序不变
def remove_repeat2(list1):
l3 = []
for i in list1:
if i not in l3:
l3.append(i)
return l3
# 去除list中的重复元素,且保持原来的顺序不变
def remove_repeat3(list1):
from collections import OrderedDict
# d = OrderedDict.fromkeys(['a','b','c','a','b','c','c'])
d = OrderedDict.fromkeys(list1)
return list(d)
# 去除list中的重复元素,顺序会变
def remove_repeat4(list1):
return list(set(list1))
def remove_repeat5(list1):
# a = [1,2,4,2,4,5,6,5,7,8,9,0]
b = {}
b=b.fromkeys(list1)
c = b.keys()
print c,type(c)
c = sorted(c,key=list1.index)
print c
# remove_repeat5(list1)
# 排序
def sort1():
students = [('john', 'A', 15), ('jane', 'B', 12), ('dave', 'B', 10)]
print sorted(students, key=lambda student: student[2])
# sort1()
# 去除字符串中的空格
def list_space():
list1 = [' ','ff','','ss','aaa','']
# for i in list1:
# # print i
# if i == ' ':
# # print 'yichang'
# list1.remove(i)
# # print i,list1
ls = [x for x in list1 if x.strip()]
print ls
# 将字符串倒序
def string_reverse(m):
num=len(m)
a=[]
for i in range(num):
a.append(m[num-1-i])#从最后一位的元素开始往新list内添加元素
return ''.join(a)
# 字符串的练习
# 字符串大小写转换方法
def string1():
s='fu junmin'
print s.capitalize() #Fu junmin,只对字符串的首字母大写
print s #fu junmin
s = 'The quick brown fox jumped over the lazy dog.'
print(string.capwords(s)) #The Quick Brown Fox Jumped Over The Lazy Dog.
# string1()
def count_find():
s = 'I love python!'
print s.count('o') # 2
print s.find('o') # 3
print s.count('9') # 0
print s.find('9') # -1
print s.count('o',5) # 1
print s.find('o',5) # 11
print s.count('o',5,7) # 0
print s.find('o',5,7) # 11
# count_find()
# append和extend区别
def append_extend():
'''
记住一点:append是追加,extend是对列表的扩展
:return:
'''
list1 = [1,2]
list1.append([3,4])
print list1 # [1, 2, [3, 4]]
list1.extend([5,6]) # [1, 2, [3, 4], 5, 6]
print list1
list1.append('love') # [1, 2, [3, 4], 5, 6, 'love']
print list1
list1.extend('python') # [1, 2, [3, 4], 5, 6, 'love', 'p', 'y', 't', 'h', 'o', 'n']
print list1
# append_extend()
def alphabet_sorted():
'''
对一篇文章中的单词按照字母排序
:return:
'''
str = 'I love python. If you love too , join us !'
list1 = str.split()
list1.sort()
print list1
# alphabet_sorted()
# 字符串倒序
def string_reverse():
list2 = 'I love python'
list3 = list2.split()
list3.reverse()
print list3 #['python', 'love', 'I']
str4 = ' '.join(list3)
print str4 # python love I
'''
# 列出目录下的所有文件
'''
def list_dir(dn,list_name):
import os
# dn=os.getcwd()
# 获取当前目录,/Users/fujunmin/files/python/selfPractise/test
L=os.listdir(dn)
print L
print '\n'.join(L)
for i in L:
file_path = os.path.join(dn,i)
# print file_path
if os.path.isdir(file_path):
list_dir(file_path,list_name)
else:
list_name.append(file_path)
# with open('result.txt','w') as wf:
# wf.write('\n'.join(L))
# dn = '/Users/fujunmin/files/python/selfPractise/test/tmp'
# list_name = []
#
# list_dir(dn,list_name)
# print 'wenjian:',list_name
# result = [name for name in os.listdir('.') if name.endswith(('.aa','.py'))]
#
# print os.getcwd() # /Users/fujunmin/files/python/selfPractise/test
# print result # ['__init__.py', 'test1_basic.py']
'''
# 取出csv文件中多个字段值符合条件的
'''
def csv_test():
# f = open('11.txt','r')
with open('1.txt','r') as f1,open('2.txt', 'w') as f2:
for line in f1.readlines(): # 'aaaaafds\n'
a = line.strip() # a: 'aaaaafds'
a0 = a[0]
# if a.startswith(['a-z','A-Z']):
if a0.isalpha():
f2.write(a+'\n')
f1.close()
f2.close()
#
# def csv_test2():
# csvFile = open('11.txt','r')
# reader = csv.reader(csvFile)
#
# for item in reader:
# print item
# csv_test()
'''
# 删除字符串中多余字符
'''
def string_remove():
str1 = ' abc \n'
print str1.strip() # abc
str2 = '----abcdf++++'
print str2.strip('-+') # abcdf
# 删除字符串中多余字符
def string_remove2():
str1 = 'abc*wrt' #删除字符串中的*
print str1[:3]+str1[4:] # abcwrt
# 删除字符串中多余字符
def string_remove3():
str1 = '\nabc\nwrt22\n' #删除字符串中的所有\n
print str1.replace('\n','') # abcwrt22
str2 = '\nabc\nwrt22\t666\t' # 删除字符串中的所有\n,\t
import re
print re.sub('[\n\t]','',str2) # abcwrt22666
str3 = 'abc123jm456n3fF'
print re.sub(r'([\d]+)', '', str3) # abcjmnfF
print re.sub(r'([\d]+)', '', str3).lower() # abcjmnff
# string有一个translate方法,第一个参数是映射的关系,第二个参数是删除操作
def translate_pra():
s = 'abc123456xyz'
import string
print s.translate(string.maketrans('abc','xyz')) # xyz123456xyz
s2 = 'abc\rdefff\nkl\t'
print s2.translate(None,'\r\n\t') # abcdefffkl
# translate_pra()
'''
字符串拼接
'''
def string_add():
str1 = 'abc'
str2 = '456'
print str1+str2 # abc456
l1 = ['a1','a2','a3']
s = ''
for i in l1:
s += i # a1a2a3,但是这种方法会生成很多临时字符串,s分别和列表中的第一个、第二个...字符拼接,推荐使用str.join()方法
print s
def string_join():
print ';'.join(['aa','bb','cc']) # aa;bb;cc
print ''.join(['aa','bb','cc']) # aabbcc
l1 = ['a1', 'a2', 'a3']
print ''.join(l1) # a1a2a3,拼接列表中的每一个字符
# 如果列表中含有数字
l2 = ['a1',123,'b1',456]
print ''.join([str(x)for x in l2]) #使用列表解析式, a1123b1456
# 列表解析式有一个问题,如果列表过长的话,会很耗费时间,我们可以使用生成器表达式。存储的开销比列表小的多
print ''.join(str(x)for x in l2) # a1123b1456
# string_join()
'''
string.endwith()用法
'''
# 判断一个字符串是不是对称字符串
def symmetrical():
str1 = "mnanm"
list1 = list(str1) #['m','n','a','n',m]
list2 = list1
list2.reverse()
print list1
print list2
if list2 ==list1:
print 'It is a symmetrical string'
# symmetrical()
# from urllib import urlretrieve
#
# urlretrieve('http://table.finance.yahoo.com/table.csv?s=000001.sz','pingan.csv')
'''
imooc--python进阶课程
'''
#
from random import randint
# data = [randint(-3,3) for _ in xrange(5)]
# print data # [0, 1, -1, -1, 2]
#
# print filter(lambda x:x>=0,data) # [0, 1, 2],x接收的是data中的每一项,返回的是x>=0的项到最后的结果中
#
# d = {x:randint(60,100) for x in xrange(1,21)} # 生成的d是一个字典,现在过滤出来值高于90的项
#
# print {k:v for k,v in d.items() if v>90} # {1: 100, 3: 97, 6: 98, 9: 100, 12: 95, 13: 93, 17: 100, 18: 91}
# data = [randint(-3,3) for _ in xrange(5)]
# s = set(data) # s: {0, 2, 3}
#
# print {x for x in s if x%3==0}
def calculate_count():
from random import randint
data = [randint(0,3) for _ in xrange(6)]
print data # [1, 3, 3, 2, 2, 1]
c = dict.fromkeys(data,0) # {0: 0, 1: 0, 2: 0, 3: 0}
for x in data:
c[x]+=1
print c # {1: 2, 2: 2, 3: 2}
print {k:v for k,v in c.items() if v>90}
def calculate_count2():
from collections import Counter
data = [1,2,2,2,3,3,5,6,6,6,6]
c = Counter(data)
print c # Counter({6: 4, 2: 3, 3: 2, 1: 1, 5: 1}),得到一个字典
print c[6] # 4
print c.most_common(2) # [(6, 4), (2, 3)],查找出现频度为前两个的
def calculate_count3():
from collections import Counter
content = open('2.txt','r').read()
print content,type(content) # i love python , and you? if you love it too? <type 'str'>
import re
c = Counter(re.split('\W+',content))
print c # Counter({'love': 2, 'you': 2, 'and': 1, '': 1, 'i': 1, 'it': 1, 'python': 1, 'too': 1, 'if': 1})
num = c.most_common(1)
print num # [('love', 2)]
# calculate_count3()
网友评论