- Item5:写Helper函数而不是复杂的表达式。(DRY原则: 不要重复自己(Don’t repeat yourself.),尽量封装常用的方法。)
# 解码URL的请求串(参数?)
from urllib.parse import parse_qs
my_values = parse_qs('red=5&blue=0&green=',
keep_blank_values=True)
print(repr(my_values))
>>>
{'red': ['5'], 'blue': ['0'], 'green': ['']}
# 用get可以获得对应的 参数
print('Red: ', my_values.get('red'))
print('Green: ', my_values.get('green'))
print('Opacity: ', my_values.get('opacity'))
>>>
Red: ['5']
Green: ['']
Opacity: None
# For query string 'red=5&blue=0&green='
red = my_values.get('red', [''])[0] or 0
green = my_values.get('green', [''])[0] or 0
opacity = my_values.get('opacity', [''])[0] or 0
print(f'Red: {red!r}')
print(f'Green: {green!r}')
print(f'Opacity: {opacity!r}')
>>>
Red: '5'
Green: 0
Opacity: 0
# 利用字符串判空的trick,or上一个0,即默认为0,最后转为整型。虽然整体的表达整洁,但是难以理解,需要拆分语句来阅读。
red = int(my_values.get('red', [''])[0] or 0)
# 转换成三元组会稍微好一点,但是仍然不像封装的函数。
red_str = my_values.get('red', [''])
red = int(red_str[0]) if red_str[0] else 0
# 拆分成原始的if-else语句段,更加可读
green_str = my_values.get('green', [''])
if green_str[0]:
green = int(green_str[0])
else:
green = 0
# 最后抽象成一个函数
def get_first_int(values, key, default=0):
found = values.get(key, [''])
if found[0]:
return int(found[0])
return default
# 只要调用就可以,方便后续复用:
green = get_first_int(my_values, 'green')
- Item6:用多个变量赋值而不是下标的方式来拆分变量
# 用tuple把可变的字典变成不可变的元组。
snack_calories = {
'chips': 140,
'popcorn': 80,
'nuts': 190,
}
items = tuple(snack_calories.items())
print(items)
>>>
(('chips', 140), ('popcorn', 80), ('nuts', 190))
# 对元组可以进行下标访问。
item = ('Peanut butter', 'Jelly')
first = item[0]
second = item[1]
print(first, 'and', second)
>>>
Peanut butter and Jelly
# 但是元组不允许下标赋值(不可变)
pair = ('Chocolate', 'Peanut butter')
pair[0] = 'Honey'
>>>
Traceback ...
TypeError: 'tuple' object does not support item assignment
# 可以用另一种方式来获取元组中的元素
item = ('Peanut butter', 'Jelly')
first, second = item # Unpacking
print(first, 'and', second)
>>>
Peanut butter and Jelly
# 同理,可以这样获取元素,但是不建议:
favorite_snacks = {
'salty': ('pretzels', 100),
'sweet': ('cookies', 180),
'veggie': ('carrots', 20),
}
((type1, (name1, cals1)),
(type2, (name2, cals2)),
(type3, (name3, cals3))) = favorite_snacks.items()
print(f'Favorite {type1} is {name1} with {cals1} calories')
print(f'Favorite {type2} is {name2} with {cals2} calories')
print(f'Favorite {type3} is {name3} with {cals3} calories')
>>>
Favorite salty is pretzels with 100 calories
Favorite sweet is cookies with 180 calories
Favorite veggie is carrots with 20 calories
其中,最隐晦且有用的一点就是:利用此特性来完成数值交换。
# 传统的数值交换
def bubble_sort(a):
for _ in range(len(a)):
for i in range(1, len(a)):
if a[i] < a[i-1]:
temp = a[i]
a[i] = a[i-1]
a[i-1] = temp
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']
# 利用了unpacking特性的数值交换
def bubble_sort(a):
for _ in range(len(a)):
for i in range(1, len(a)):
if a[i] < a[i-1]:
a[i-1], a[i] = a[i], a[i-1] # Swap
names = ['pretzels', 'carrots', 'arugula', 'bacon']
bubble_sort(names)
print(names)
>>>
['arugula', 'bacon', 'carrots', 'pretzels']
可以这么理解:取得了右边的数值,然后包装成一个隐藏的元组(a[i], a[i-1]),然后再进行unpack(拆包)之后,分别赋值给a[i-1]和a[i]。
unpacking的特性可以再参照下边的例子:
# 不用unpacking
snacks = [('bacon', 350), ('donut', 240), ('muffin', 190)]
for i in range(len(snacks)):
item = snacks[i]
name = item[0]
calories = item[1]
print(f'#{i+1}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories
# 用了unpacking
for rank, (name, calories) in enumerate(snacks, 1):
print(f'#{rank}: {name} has {calories} calories')
>>>
#1: bacon has 350 calories
#2: donut has 240 calories
#3: muffin has 190 calories
当元组的长度较短,且含义明确时,直接拆包来获得对应的变量,比直接使用下标去取得数据,逻辑意义上更加明确,代码的可读性上也会更好。
- Item7:用enumerate而不是range
# 使用range来指定迭代次数
from random import randint
random_bits = 0
for i in range(32):
if randint(0, 1):
random_bits |= 1 << i
print(bin(random_bits))
>>>
0b11101000100100000111000010000001
# 直接for-in来迭代
flavor_list = ['vanilla', 'chocolate', 'pecan', 'strawberry']
for flavor in flavor_list:
print(f'{flavor} is delicious')
>>>
vanilla is delicious
chocolate is delicious
pecan is delicious
strawberry is delicious
# 当需要下标的时候,可能就会用到range
for i in range(len(flavor_list)):
flavor = flavor_list[i]
print(f'{i + 1}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry
# enumerate本质是iterator(迭代器),每次next返回元组(由下标以及迭代对象的元素组成)。
it = enumerate(flavor_list)
print(next(it))
print(next(it))
>>>
(0, 'vanilla')
(1, 'chocolate')
# 自然可以使用for-in以及unpacking的组合方式来迭代enumerate。
for i, flavor in enumerate(flavor_list, 1): # 可以指定下标的编号从多少开始。
print(f'{i}: {flavor}')
>>>
1: vanilla
2: chocolate
3: pecan
4: strawberry
- Item8:用zip来同时处理迭代器。
# 可以用列表推导式来生成一个list
names = ['Cecilia', 'Lise', 'Marie']
counts = [len(n) for n in names]
print(counts)
>>>
[7, 4, 5]
# 如果要找到最长的名字,用这种普通的range迭代比较麻烦。
longest_name = None
max_count = 0
for i in range(len(names)):
count = counts[i]
if count > max_count:
longest_name = names[i]
max_count = count
print(longest_name)
>>>
Cecilia
# enumerate稍微好一点
for i, name in enumerate(names):
count = counts[i]
if count > max_count:
longest_name = name
max_count = count
# zip则是结合两个列表简明完成任务。(用一个懒惰生成器包裹了两个或更多的迭代器,每次next是所有迭代器的下一个值组成的元组)
for name, count in zip(names, counts):
if count > max_count:
longest_name = name
max_count = count
但是,需要注意zip的迭代器们可能存在长度不一的问题:
# 比如用了之前的counts,但是对names添加了一个新名字,打印的时候,会按照最短长度来迭代。
names.append('Rosalind')
for name, count in zip(names, counts):
print(name)
>>>
Cecilia
Lise
Marie
# 如果不确定长度是否一致,则可以使用itertools的zip_longest函数来迭代。(由于最后一个名字对应没有数值,则用None替代。)
import itertools
for name, count in itertools.zip_longest(names, counts):
print(f'{name}: {count}')
>>>
Cecilia: 7 Lise: 4
Marie: 5
Rosalind: None
- Item9:避免在for和while循环后面用else代码块
for i in range(3):
print('Loop', i)
else:
print('Else block!')
>>>
Loop 0 Loop 1
Loop 2
Else block!
# else此处违反直觉:因为在try/except和if/else组合中,后者都是表示如果前者失败的话,做什么操作。
# 而try/finally则是无论前者如何,最终后者会做什么操作。
# for/else此处则是相反的。
for i in range(3):
print('Loop', i)
if i == 1:
break
else:
print('Else block!')
>>>
Loop 0
Loop 1
# 如果是空列表,则直接执行else块。
for x in []:
print('Never runs')
else:
print('For Else block!')
>>>
For Else block!
# while/else也是一样
while False:
print('Never runs')
else:
print('While Else block!')
>>>
While Else block!
# 不通过break的时候,说明是互为质数。
a = 4
b = 9
for i in range(2, min(a, b) + 1):
print('Testing', i)
if a % i == 0 and b % i == 0:
print('Not coprime')
break
else:
print('Coprime')
>>>
Testing 2
Testing 3
Testing 4
Coprime
#(上面的场景适用,但实际不推荐使用。写一个helper函数来做计算更合适。)
def coprime(a, b):
for i in range(2, min(a, b) + 1):
if a % i == 0 and b % i == 0:
return False
return True
assert coprime(4, 9)
assert not coprime(3, 6)
# 或者通过一个变量来表示是否互质。
def coprime_alternate(a, b):
is_coprime = True
for i in range(2, min(a, b) + 1):
if a % i == 0 and b % i == 0:
is_coprime = False
break
return is_coprime
assert coprime_alternate(4, 9)
assert not coprime_alternate(3, 6)
通过helper函数可以提高可读性(相比for/else语句)。
- Item10:避免重复赋值表达式时可读性差的问题(walrus操作符python3.8之后的语法,因为“:=”像眼睛和长牙,所以就叫walrus。)
# 水果篮子里面有什么
fresh_fruit = {
'apple': 10,
'banana': 8,
'lemon': 5,
}
# 做水果汁
def make_lemonade(count):
...
def out_of_stock():
...
# 原实现-1
count = fresh_fruit.get('lemon', 0)
if count:
make_lemonade(count)
else:
out_of_stock()
# 实际count只在if这一块使用到,放到if前,似乎有点放大了作用域。
# 使用walrus重写了上面的片段,实现-2
if count := fresh_fruit.get('lemon', 0):
make_lemonade(count)
else:
out_of_stock()
def make_cider(count):
...
count = fresh_fruit.get('apple', 0)
if count >= 4:
make_cider(count)
else:
out_of_stock()
# 同样用walrus操作符重写片段。
if (count := fresh_fruit.get('apple', 0)) >= 4:
make_cider(count)
else:
out_of_stock()
def slice_bananas(count):
...
class OutOfBananas(Exception):
pass
def make_smoothies(count):
...
pieces = 0
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# 闭包问题,为了逻辑通顺,可以放在if/else里面来赋值变量。
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
else:
pieces = 0
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# 用walrus继续来重写
pieces = 0
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
else:
pieces = 0
try:
smoothies = make_smoothies(pieces)
except OutOfBananas:
out_of_stock()
# if/else实现switch的可读性比较差
count = fresh_fruit.get('banana', 0)
if count >= 2:
pieces = slice_bananas(count)
to_enjoy = make_smoothies(pieces)
else:
count = fresh_fruit.get('apple', 0)
if count >= 4:
to_enjoy = make_cider(count)
else:
count = fresh_fruit.get('lemon', 0)
if count:
to_enjoy = make_lemonade(count)
else:
to_enjoy‘= 'Nothing'
# 用walrus来配合实现就稍微好一点
if (count := fresh_fruit.get('banana', 0)) >= 2:
pieces = slice_bananas(count)
to_enjoy = make_smoothies(pieces)
elif (count := fresh_fruit.get('apple', 0)) >= 4:
to_enjoy = make_cider(count)
elif count := fresh_fruit.get('lemon', 0):
to_enjoy = make_lemonade(count)
else:
to_enjoy = 'Nothing'
# while循环
def pick_fruit():
...
def make_juice(fruit, count):
...
bottles = []
fresh_fruit = pick_fruit()
while fresh_fruit:
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
fresh_fruit = pick_fruit()
# 整体为loop-and-a-half的结构。
bottles = []
while True: # Loop
fresh_fruit = pick_fruit()
if not fresh_fruit: # And a half
break
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
# 可以通过walrus表达式来重建,提升可读性。
bottles = []
while fresh_fruit := pick_fruit():
for fruit, count in fresh_fruit.items():
batch = make_juice(fruit, count)
bottles.extend(batch)
列表和字典
一个常见的方式是用list来处理序列相关的工作。
dict是list的一个自然补充。键值对也倍叫做联合数组或者哈希表。提供常数级的赋值和访问的均摊时间复杂度。
- Item11:知道如何去切分序列
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
print('Middle two: ', a[3:5])
print('All but ends:', a[1:7])
>>>
Middle two: ['d', 'e']
All but ends: ['b', 'c', 'd', 'e', 'f', 'g']
# 为了提高可读性,开始为0或者结束为len应该省略。
assert a[:5] == a[0:5]
assert a[5:] == a[5:len(a)]
a[:] # ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
a[:5] # ['a', 'b', 'c', 'd', 'e']
a[:-1] # ['a', 'b', 'c', 'd', 'e', 'f', 'g']
a[4:] # ['e', 'f', 'g', 'h']
a[-3:] # ['f', 'g', 'h']
a[2:5] # ['c', 'd', 'e']
a[2:-1] # ['c', 'd', 'e', 'f', 'g']
a[-3:-1] # ['f', 'g']
# 切片会避免一些问题。
first_twenty_items = a[:20]
last_twenty_items = a[-20:]
# 比如取到不存在的下标:
a[20]
>>>
Traceback ...
IndexError: list index out of range
# 可以通过切片来生成新的列表,不过这个列表是浅拷贝的新列表。
a = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
b = a[3:]
print('Before: ', b)
b[1] = 99
print('After: ', b)
print('No change:', a)
>>>
Before: ['d', 'e', 'f', 'g', 'h']
After: ['d', 99, 'f', 'g', 'h']
No change: ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
# 赋值长度可以不一样,但是最终会收缩或者伸长。
print('Before ', a)
a[2:7] = [99, 22, 14]
print('After ', a)
>>>
Before ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
After ['a', 'b', 99, 22, 14, 'h']
print('Before ', a)
a[2:3] = [47, 11]
print('After ', a)
>>>
Before ['a', 'b', 99, 22, 14, 'h']
After ['a', 'b', 47, 11, 22, 14, 'h']
# 浅拷贝的问题,所以元素相同,但是对应的指针不同。
b = a[:]
assert b == a and b is not a
# 再次加深浅拷贝的印象,如果使用切片来赋值,最终是替换了指向的数值,而不是给一个新的列表:
b = a
print('Before a', a)
print('Before b', b)
a[:] = [101, 102, 103]
assert a is b # Still the same list object
print('After a ', a) # Now has different contents
print('After b ', b) # Same list, so same contents as a
>>>
Before a ['a', 'b', 47, 11, 22, 14, 'h']
Before b ['a', 'b', 47, 11, 22, 14, 'h']
After a [101, 102, 103]
After b [101, 102, 103]
- Item12:避免在一个表达式中切分和步幅(striding)操作
列表的切片语法:somelist[start:end:stride],建议用正的stride的同时,避免用start和end下标。
# 利用切片来奇数和偶数遍历
x = ['red', 'orange', 'yellow', 'green', 'blue', 'purple']
odds = x[::2]
evens = x[1::2]
print(odds)
print(evens)
>>>
['red', 'yellow', 'blue']
['orange', 'green', 'purple']
# 利用切片striding为-1来反向遍历(bytes和unicode的字符串都可以)
x = b'mongoose'
y = x[::-1]
print(y)
>>>
b'esoognom'
x = '寿司'
y = x[::-1]
print(y)
>>>
司寿
# 但是utf-8不行:
w = '寿司'
x = w.encode('utf-8')
y = x[::-1]
z = y.decode('utf-8')
>>>
Traceback ...
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb8 in position 0: invalid start byte
-1很有用嘛?看看下面的例子:
x = ['a', 'b', 'c', 'd', 'e', 'f', 'g', 'h']
x[::2] # ['a', 'c', 'e', 'g']
x[::-2] # ['h', 'f', 'd', 'b']
# 如果结合起来呢?
x[2::2] # ['c', 'e', 'g']
x[-2::-2] # ['g', 'e', 'c', 'a']
x[-2:2:-2] # ['g', 'e']
x[2:2:-2] # []
由于语法上的密度较大,可读性降低。可以先striding再slicing。(额外创建了浅拷贝,如果想优化,可以考虑itertools的islice方法)
y = x[::2] # ['a', 'c', 'e', 'g']
z = y[1:-1] # ['c', 'e']
- Item13:使用Catch-All Unpacking(捕捉-全部的拆包)而不是切片。
# 拆箱的时候,应该注意变量的个数。
car_ages = [0, 9, 4, 8, 7, 20, 19, 1, 6, 15]
car_ages_descending = sorted(car_ages, reverse=True)
oldest, second_oldest = car_ages_descending
>>>
Traceback ...
ValueError: too many values to unpack (expected 2)
# 当然可以通过下标来访问,但是比较麻烦。
oldest = car_ages_descending[0]
second_oldest = car_ages_descending[1]
others = car_ages_descending[2:]
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]
# 可以用带星表达式(starred expression)来接收拆包的结果。
oldest, second_oldest, *others = car_ages_descending
print(oldest, second_oldest, others)
>>>
20 19 [15, 9, 8, 7, 6, 4, 1, 0]
oldest, *others, youngest = car_ages_descending
print(oldest, youngest, others)
*others, second_youngest, youngest = car_ages_descending
print(youngest, second_youngest, others)
>>>
20 0 [19, 15, 9, 8, 7, 6, 4, 1]
0 1 [20, 19, 15, 9, 8, 7, 6, 4]
# 不能在其本身用*表达式。
*others = car_ages_descending
>>>
Traceback ...
SyntaxError: starred assignment target must be in a list or tuple
# 单行拆出来不可以。
first, *middle, *second_middle, last = [1, 2, 3, 4]
>>>
Traceback ...
SyntaxError: two starred expressions in assignment
# 可以通过下面的例子提供对*表达式的直觉。
car_inventory = {
'Downtown': ('Silver Shadow', 'Pinto', 'DMC'),
'Airport': ('Skyline', 'Viper', 'Gremlin', 'Nova'),
}
((loc1, (best1, *rest1)),
(loc2, (best2, *rest2))) = car_inventory.items()
print(f'Best at {loc1} is {best1}, {len(rest1)} others')
print(f'Best at {loc2} is {best2}, {len(rest2)} others')
>>>
Best at Downtown is Silver Shadow, 2 others
Best at Airport is Skyline, 3 others
如果被拆包的部分比变量短,最终得到的是空列表。
short_list = [1, 2]
first, second, *rest = short_list
print(first, second, rest)
>>>
1 2 []
任意迭代器可以拆包
it = iter(range(1, 3))
first, second = it
print(f'{first} and {second}')
>>>
1 and 2
最后再通过处理CSV表格的例子来加深拆包和*表达式的使用:
def generate_csv():
yield ('Date', 'Make', 'Model', 'Year', 'Price')
...
# 不用*表达式拆行
all_csv_rows = list(generate_csv())
header = all_csv_rows[0]
rows = all_csv_rows[1:]
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count: 200
# 用*表达式拆行。
it = generate_csv()
header, *rows = it
print('CSV Header:', header)
print('Row count: ', len(rows))
>>>
CSV Header: ('Date', 'Make', 'Model', 'Year', 'Price')
Row count: 200
小心*号表达式最终爆内存(因为返回的是一个list)
- Item14:通过复杂的标准和key参数来排序
sort可以对列表进行排序。
numbers = [93, 86, 11, 68, 70]
numbers.sort()
print(numbers)
>>>
[11, 68, 70, 86, 93]
class Tool:
def __init__(self, name, weight):
self.name = name
self.weight = weight
def __repr__(self):
return f'Tool({self.name!r}, {self.weight})'
tools = [
Tool('level', 3.5),
Tool('hammer', 1.25),
Tool('screwdriver', 0.5),
Tool('chisel', 0.25),
]
# 没有指定对比的key,排序失效
tools.sort()
>>>
Traceback ...
TypeError: '<' not supported between instances of 'Tool' and
'Tool'
# 指定了使用Tool的name来排序。
print('Unsorted:', repr(tools))
tools.sort(key=lambda x: x.name)
print('\nSorted: ', tools)
>>>
Unsorted: [Tool('level', 3.5),
Tool('hammer', 1.25),
Tool('screwdriver', 0.5),
Tool('chisel', 0.25)]
Sorted: [Tool('chisel', 0.25),
Tool('hammer', 1.25),
Tool('level', 3.5),
Tool('screwdriver', 0.5)]
# 当然也可以指定用体重来排序。
tools.sort(key=lambda x: x.weight)
print('By weight:', tools)
>>>
By weight: [Tool('chisel', 0.25),
Tool('screwdriver', 0.5),
Tool('hammer', 1.25),
Tool('level', 3.5)]
# 此处为了确保按照字母顺序排序,所以转成了小写(lower())
places = ['home', 'work', 'New York', 'Paris']
places.sort()
print('Case sensitive: ', places)
places.sort(key=lambda x: x.lower())
print('Case insensitive:', places)
>>>
Case sensitive: ['New York', 'Paris', 'home', 'work']
Case insensitive: ['home', 'New York', 'Paris', 'work']
同时排序多个条件怎么操作?最简单的方式是用元组(默认是自然排序,意味着实现了lt等sort需要的方法)。
saw = (5, 'circular saw')
jackhammer = (40, 'jackhammer')
assert not (jackhammer < saw) # Matches expectations
drill = (4, 'drill')
sander = (4, 'sander')
assert drill[0] == sander[0] # Same weight
assert drill[1] < sander[1] # Alphabetically less
assert drill < sander # Thus, drill comes first
power_tools = [
Tool('drill', 4),
Tool('circular saw', 5),
Tool('jackhammer', 40),
Tool('sander', 4),
]
power_tools.sort(key=lambda x: (x.weight, x.name))
print(power_tools)
>>>
[Tool('drill', 4),
Tool('sander', 4),
Tool('circular saw', 5),
Tool('jackhammer', 40)]
# 当然可以指定reverse来使所有key逆序。
power_tools.sort(key=lambda x: (x.weight, x.name), reverse=True) # Makes all criteria
descending
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('sander', 4),
Tool('drill', 4)]
# 如果一部分需要逆序,一部分正序,怎么办?(一元负号可以稍微解决这个问题,但是其不支持所有类型)
power_tools.sort(key=lambda x: (-x.weight, x.name))
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('drill', 4),
Tool('sander', 4)]
power_tools.sort(key=lambda x: (x.weight, -x.name),
reverse=True)
>>>
Traceback ...
TypeError: bad operand type for unary -: 'str'
迫不得已才需要用到多个sort的组合。
power_tools.sort(key=lambda x: x.name) # Name ascending
power_tools.sort(key=lambda x: x.weight, # Weight descending
reverse=True)
print(power_tools)
>>>
[Tool('jackhammer', 40),
Tool('circular saw', 5),
Tool('drill', 4),
Tool('sander', 4)]
先对name,再对weight进行排序,所以得到最终的结果。相反,也可以先对weight排序,再对name排序,看个人的需求。
- Item15:注意dict的插入顺序
Python3.5之前,迭代一个dict的时候,顺序是随机的。也就是和原本插入的顺序不一致。这个特性使得测试样例比较难以复现,难以debug。(主要是由于哈希表的实现用了内置的hash函数和随机种子)
# Python 3.5
baby_names = {
'cat': 'kitten',
'dog': 'puppy',
}
print(baby_names)
>>>
{'dog': 'puppy', 'cat': 'kitten'}
3.6之后正常:
baby_names = {
'cat': 'kitten',
'dog': 'puppy',
}
print(baby_names)
>>>
{'cat': 'kitten', 'dog': 'puppy'}
因此,3.6之前的依赖于dict的方法(keys, values, items, popitem),都会有这种惊喜存在:
# Python 3.5
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem()) # Randomly chooses an item
>>>
['dog', 'cat']
['puppy', 'kitten']
[('dog', 'puppy'), ('cat', 'kitten')]
('dog', 'puppy')
# 3.6之后
print(list(baby_names.keys()))
print(list(baby_names.values()))
print(list(baby_names.items()))
print(baby_names.popitem()) # Last item inserted
>>>
['cat', 'dog']
['kitten', 'puppy']
[('cat', 'kitten'), ('dog', 'puppy')]
('dog', 'puppy')
比如用**kwargs捕捉所有键值对,由于顺序问题,难以debug。
# Python 3.5
def my_func(**kwargs):
for key, value in kwargs.items():
print('%s = %s' % (key, value))
my_func(goose='gosling', kangaroo='joey')
>>>
kangaroo = joey
goose = gosling
def my_func(**kwargs):
for key, value in kwargs.items():
print(f'{key} = {value}')
my_func(goose='gosling', kangaroo='joey')
>>>
goose = gosling
kangaroo = joey
类也用dict作为实例字典。早期版本也是存在顺序问题。
# Python 3.5
class MyClass:
def __init__(self):
self.alligator = 'hatchling'
self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
print('%s = %s' % (key, value))
>>>
elephant = calf
alligator = hatchling
# 之后的版本
class MyClass:
def __init__(self):
self.alligator = 'hatchling'
self.elephant = 'calf'
a = MyClass()
for key, value in a.__dict__.items():
print(f'{key} = {value}')
>>>
alligator = hatchling
elephant = calf
尽管从3.7开始,dict和collections的OrderedDict的表现相似,但是如果经常插入和popitem(比如实现LRU缓存),OrderedDict可能比dict更适合。
再看一个例子。比如现在有动物的投票数字典,然后需求是得到投票数最多的动物,作为赢家,那么可以实现如下:
votes = {
'otter': 1281,
'polar bear': 587,
'fox': 863,
}
def populate_ranks(votes, ranks):
names = list(votes.keys())
names.sort(key=votes.get, reverse=True) # 通过得到对应的票数来进行排序。
for i, name in enumerate(names, 1):
ranks[name] = i
def get_winner(ranks):
return next(iter(ranks))
ranks = {}
populate_ranks(votes, ranks)
print(ranks)
winner = get_winner(ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
otter
但是,现在需求变了,现在要的不是投票数顺序来遍历,而是名字的顺序来遍历了。此时可以用collections.abc来定义一个新的字典类(引入了dict-like的类型,但是会引入一些奇怪的bugs):
from collections.abc import MutableMapping
class SortedDict(MutableMapping):
def __init__(self):
self.data = {}
def __getitem__(self, key):
return self.data[key]
def __setitem__(self, key, value):
self.data[key] = value
def __delitem__(self, key):
del self.data[key]
def __iter__(self):
keys = list(self.data.keys())
keys.sort()
for key in keys:
yield key
def __len__(self):
return len(self.data)
sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)
>>>
{'otter': 1, 'fox': 2, 'polar bear': 3}
fox
但是,存在一个问题就是,函数的参数类型并不明显(主要是Dict的顺序问题。):sorted_ranks依赖于populate_ranks的顺序。而SortedDict打破了这个假设。可以通过:1)修改get_winner函数,或者2)直接抛出异常,或者3)显式限制参数的类型(要指定strict运行)来解决这个问题。
def get_winner(ranks):
for name, rank in ranks.items():
if rank == 1:
return name
winner = get_winner(sorted_ranks)
print(winner)
>>>
otter
def get_winner(ranks):
if not isinstance(ranks, dict):
raise TypeError('must provide a dict instance')
return next(iter(ranks))
get_winner(sorted_ranks)
>>>
Traceback ...
TypeError: must provide a dict instance
from typing import Dict, MutableMapping
def populate_ranks(votes: Dict[str, int],
ranks: Dict[str, int]) -> None:
names = list(votes.keys())
names.sort(key=votes.get, reverse=True)
for i, name in enumerate(names, 1):
ranks[name] = i
def get_winner(ranks: Dict[str, int]) -> str:
return next(iter(ranks))
class SortedDict(MutableMapping[str, int]):
...
votes = {
'otter': 1281,
'polar bear': 587,
'fox': 863,
}
sorted_ranks = SortedDict()
populate_ranks(votes, sorted_ranks)
print(sorted_ranks.data)
winner = get_winner(sorted_ranks)
print(winner)
$ python3 -m mypy --strict example.py
.../example.py:48: error: Argument 2 to "populate_ranks" has incompatible type "SortedDict"; expected "Dict[str, int]"
.../example.py:50: error: Argument 1 to "get_winner" has incompatible type "SortedDict"; expected "Dict[str, int]"
网友评论