在公司项目上需要对数据做过期处理,第一反应是redis里面的key支持过期设置,但在用的脚本都是纯python实现的没有对接过redis,本着“生命在于折腾的原则”,想实现一个类似于redis里面key过期格式的数据结构。总体设计需求:
1.基于python的dict来实现;
2.支持大部分的字典操作;
3.键的过期:支持单个key的设置和全局设置;
4.测试百万级别的键的写入和删除的性能;(性能太烂的话,没有意义呢)
5.为了提高性能,过期的key不实时删除,在读取值或者调用特殊方法时进行删除;
6.获取已过期的key时返回None;
# 定义一个支持key时间过期的字典
import time
class ExpireDict(dict):
"""
一个支持key过期的类似的字典的数据结构
1.创建一个过期字典对象 obj = ExpireDict()
2.设置全局的key的过期时间 obj.exprie_time = 120 默认的过期时间为60秒
3.添加key,并重新设置过期时间: obj['a'] =1;obj.set_key_expired('a',30);
4.获取key的值 obj['a'];obj.get('a')
5.不支持的字典内置方法:fromkeys,update,不支持工厂方法和字典推导式创建字典;
"""
def __init__(self):
"""
默认的超时60s
"""
self.__expired = 60
super(ExpireDict, self).__init__()
@property
def expire_time(self):
return self.__expired
@expire_time.setter
def expire_time(self, value):
self.__expired = value
def __setitem__(self, key, value):
"""
赋值
:param key:
:param value:
:return:
"""
begin_time = time.time()
super(ExpireDict, self).__setitem__(key, [begin_time, begin_time + self.__expired, value])
def __getitem__(self, key):
"""
每次获取值是调用过期处理方法,只返回真实值
:param itme
:return:
"""
if self.check_key(key) is False:
return None
return super(ExpireDict, self).__getitem__(key)[-1]
def set_key_expired(self, key, expired):
"""
设置指定key的过期时间:过期时间等于最后一次写入的时间与expired的和
:param key:
:param expired 过期时间
:return:
"""
value_list = super(ExpireDict, self).__getitem__(key)
value_list[1] = value_list[0] + expired
super(ExpireDict, self).__setitem__(key, value_list)
def get(self, key):
if self.check_key(key) is False:
return None
return super(ExpireDict, self).get(key)[-1]
def setdefault(self, key, value):
# self.del_expired_key()
super(ExpireDict, self).setdefault(key, [time.time(), time.time() + self.__expired, value])
def values(self):
self.del_expired_key()
value_list = list(super(ExpireDict, self).values())
new_values = map(lambda i: i[-1], value_list)
return new_values
def items(self):
self.del_expired_key()
itmes_list = list(super(ExpireDict, self).items())
itmes = map(lambda i: (i[0], i[1][-1]), itmes_list)
return itmes
def pop(self, key):
self.del_expired_key()
return super(ExpireDict, self).pop(key)[-1]
def popitem(self):
self.del_expired_key()
item = super(ExpireDict, self).popitem()
return item[0], item[1][-1]
def __get_expired(self, key):
"""
获取指定元素的写入时间
:param item:
:return:
"""
return super(ExpireDict, self).__getitem__(key)[1]
def check_key(self, key):
"""
判断key是否过期,过期后执行删除动作
:param key:
:return:
"""
end_time = self.__get_expired(key)
if time.time() >= end_time:
self.__delitem__(key)
return False
else:
return True
def del_expired_key(self):
"""
删除过期的key
:return:
"""
itmes_list = list(super(ExpireDict, self).items())
sorted(itmes_list, key=lambda i: i[1][1])
end_time = time.time()
for k, v in itmes_list:
if v[1] < end_time:
self.__delitem__(k)
if __name__ == "__main__":
d = ExpireDict()
d.expire_time = 3
d["a"] = 3
d["b"] = 4
d.setdefault('c', 5)
d.set_key_expired('b', 10)
print("d['a']={0}\nd['b']={1}\nd['c']={2}".format(d['a'], d['b'], d['c']))
time.sleep(3)
print("休眠3秒后:")
print("d['a']={0}\nd['b']={1}\nd['c']={2}".format(d.get('a'), d['b'], d['c']))
使用举例:
if __name__ == "__main__":
d = ExpireDict()
d.exprie_time = 3
d["a"] = 3
d["b"] = 4
d.setdefault('c',5)
d.set_key_expired('b',10)
print("d['a']={0}\nd['b']={1}\nd['c']={2}".format(d['a'],d['b'],d['c']))
time.sleep(3)
print("休眠3秒后:")
print("d['a']={0}\nd['b']={1}\nd['c']={2}".format(d.get('a'), d['b'], d['c']))
执行结果:
image.png
遍历操作:
d = ExpireDict()
d.exprie_time = 3
#遍历赋值
for k in range(5):
d[k] = k
#遍历取值
for k,v in d.items():
print(k,v)
性能测试,以内置字典做对照:
测试环境:python3.7.4
内置字典:
from sys import getsizeof
d = dict()
# d.exprie_time = 3
#遍历赋值
start_time = time.time()
for k in range(10**6):
d[k] = k
print(f"字典的大小:{getsizeof(d)}")
print(f"遍历赋值用时:{time.time()-start_time}")
#循环
start_time = time.time()
for k in range(10**6):
a = d[k]
print(f"循环取值用时:{time.time() - start_time}")
#循环删除
start_time = time.time()
for k in range(10**6):
del d[k]
print(f"循环删除用时:{time.time() - start_time}")
测试结果:
C:\Python37\python.exe D:/work/tools/ebox_test/ExpiredDict.py
字典的大小:41943144
遍历赋值用时:0.1425952911376953
循环取值用时:0.09574246406555176
循环删除用时:0.11171293258666992
Process finished with exit code 0
过期字典:
from sys import getsizeof
d = ExpireDict()
#遍历赋值
start_time = time.time()
for k in range(10**6):
d[k] = k
print(f"字典的大小:{getsizeof(d)}")
print(f"遍历赋值用时:{time.time()-start_time}")
#循环
start_time = time.time()
for k in range(10**6):
a = d[k]
print(f"循环取值用时:{time.time() - start_time}")
#循环删除
start_time = time.time()
for k in range(10**6):
del d[k]
print(f"循环删除用时:{time.time() - start_time}")
执行结果:
C:\Python37\python.exe D:/work/tools/ebox_test/ExpiredDict.py
字典的大小:41943160
遍历赋值用时:0.9614558219909668
循环取值用时:0.9504311084747314
循环删除用时:0.2124321460723877
测试结论:
1.在100万级别的键的写入和删除操作时,过期字典的用时是内置字典的10倍左右,100w次累计用时小于1秒,单次操作在us级别;
2.存储100w个相同的数据时,内存与内置字典消耗近似;
因此过期字典ExpireDict 在非高并发的场景与内置字典同样适用;
网友评论