大师兄的Python源码学习笔记(七): Set对象
大师兄的Python源码学习笔记(九): 自制Python
一、关于Tuple
- Tuple(元组)类型与list类似,但是一旦创建,就不能随意更改包含的元素。
1. PyTupleObject对象
- Tuple在Python中是由PyTupleObject对象实现的。
Include\tupleobject.h
typedef struct {
PyObject_VAR_HEAD
PyObject *ob_item[1];
/* ob_item contains space for 'ob_size' elements.
* Items must normally not be NULL, except during construction when
* the tuple is not yet visible outside the function that builds it.
*/
} PyTupleObject;
- 头部的PyObject_VAR_HEAD说明PyListObject是变长对象。
- ob_item是指向第一个元素的指针。
- ob_item中还包含一个引用计数ob_size
- PyTupleObject不包含allocated,因为他是不可变对象。
2. 类型对象
- Tuple的类型对象为PyTuple_Type。
Objects\tupleobject.c
PyTypeObject PyTuple_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"tuple",
sizeof(PyTupleObject) - sizeof(PyObject *),
sizeof(PyObject *),
(destructor)tupledealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
(reprfunc)tuplerepr, /* tp_repr */
0, /* tp_as_number */
&tuple_as_sequence, /* tp_as_sequence */
&tuple_as_mapping, /* tp_as_mapping */
(hashfunc)tuplehash, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC |
Py_TPFLAGS_BASETYPE | Py_TPFLAGS_TUPLE_SUBCLASS, /* tp_flags */
tuple_new__doc__, /* tp_doc */
(traverseproc)tupletraverse, /* tp_traverse */
0, /* tp_clear */
tuplerichcompare, /* tp_richcompare */
0, /* tp_weaklistoffset */
tuple_iter, /* tp_iter */
0, /* tp_iternext */
tuple_methods, /* tp_methods */
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
0, /* tp_init */
0, /* tp_alloc */
tuple_new, /* tp_new */
PyObject_GC_Del, /* tp_free */
};
- 从中可以看出,Tuple不支持修改操作。
二、创建PyTupleObject对象
- 与PyListObject类似,创建PyTupleObject调用了PyTuple_New方法。
Objects\tupleobject.c
PyObject *
PyTuple_New(Py_ssize_t size)
{
PyTupleObject *op;
Py_ssize_t i;
if (size < 0) {
PyErr_BadInternalCall();
return NULL;
}
#if PyTuple_MAXSAVESIZE > 0
if (size == 0 && free_list[0]) {
op = free_list[0];
Py_INCREF(op);
#ifdef COUNT_ALLOCS
tuple_zero_allocs++;
#endif
return (PyObject *) op;
}
if (size < PyTuple_MAXSAVESIZE && (op = free_list[size]) != NULL) {
free_list[size] = (PyTupleObject *) op->ob_item[0];
numfree[size]--;
#ifdef COUNT_ALLOCS
fast_tuple_allocs++;
#endif
/* Inline PyObject_InitVar */
#ifdef Py_TRACE_REFS
Py_SIZE(op) = size;
Py_TYPE(op) = &PyTuple_Type;
#endif
_Py_NewReference((PyObject *)op);
}
else
#endif
{
/* Check for overflow */
if ((size_t)size > ((size_t)PY_SSIZE_T_MAX - sizeof(PyTupleObject) -
sizeof(PyObject *)) / sizeof(PyObject *)) {
return PyErr_NoMemory();
}
op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size);
if (op == NULL)
return NULL;
}
for (i=0; i < size; i++)
op->ob_item[i] = NULL;
#if PyTuple_MAXSAVESIZE > 0
if (size == 0) {
free_list[0] = op;
++numfree[0];
Py_INCREF(op); /* extra INCREF so that this is never freed */
}
#endif
#ifdef SHOW_TRACK_COUNT
count_tracked++;
#endif
_PyObject_GC_TRACK(op);
return (PyObject *) op;
}
- 如果缓冲池中包含对象,优先使用缓冲池中的对象。
#if PyTuple_MAXSAVESIZE > 0 if (size == 0 && free_list[0]) { op = free_list[0]; Py_INCREF(op); #ifdef COUNT_ALLOCS tuple_zero_allocs++; #endif return (PyObject *) op; } if (size < PyTuple_MAXSAVESIZE && (op = free_list[size]) != NULL) { free_list[size] = (PyTupleObject *) op->ob_item[0]; numfree[size]--;
- 否则先检查内存溢出。
/* Check for overflow */ if ((size_t)size > ((size_t)PY_SSIZE_T_MAX - sizeof(PyTupleObject) - sizeof(PyObject *)) / sizeof(PyObject *)) { return PyErr_NoMemory(); }
- 之后调用PyObject_GC_NewVar宏创建一个新的PyTupleObject。
op = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size);
三、查看对象中的元素
- 查看元素调用了PyTuple_GetItem。
Objects\tupleobject.c
PyObject *
PyTuple_GetItem(PyObject *op, Py_ssize_t i)
{
if (!PyTuple_Check(op)) {
PyErr_BadInternalCall();
return NULL;
}
if (i < 0 || i >= Py_SIZE(op)) {
PyErr_SetString(PyExc_IndexError, "tuple index out of range");
return NULL;
}
return ((PyTupleObject *)op) -> ob_item[i];
}
- 这个函数比较简单,验证对象的有效性和i是否越界后,直接返回了对象中指定位置的元素值。
四、设置对象中的元素
- 在Python层面Tuple虽然不支持元素的设置操作,但是底层PyTupleObject创建后会调用PyTuple_SetItem给对象添加元素。
Objects\tupleobject.c
int
PyTuple_SetItem(PyObject *op, Py_ssize_t i, PyObject *newitem)
{
PyObject **p;
if (!PyTuple_Check(op) || op->ob_refcnt != 1) {
Py_XDECREF(newitem);
PyErr_BadInternalCall();
return -1;
}
if (i < 0 || i >= Py_SIZE(op)) {
Py_XDECREF(newitem);
PyErr_SetString(PyExc_IndexError,
"tuple assignment index out of range");
return -1;
}
p = ((PyTupleObject *)op) -> ob_item + i;
Py_XSETREF(*p, newitem);
return 0;
}
- PyTuple_SetItem的结构非常简单,但在Python中修改Tuple元素时为什么会报错?
- 查看PyObject_SetItem
Objects\abstract.c
int
PyObject_SetItem(PyObject *o, PyObject *key, PyObject *value)
{
PyMappingMethods *m;
if (o == NULL || key == NULL || value == NULL) {
null_error();
return -1;
}
m = o->ob_type->tp_as_mapping;
if (m && m->mp_ass_subscript)
return m->mp_ass_subscript(o, key, value);
if (o->ob_type->tp_as_sequence) {
if (PyIndex_Check(key)) {
Py_ssize_t key_value;
key_value = PyNumber_AsSsize_t(key, PyExc_IndexError);
if (key_value == -1 && PyErr_Occurred())
return -1;
return PySequence_SetItem(o, key_value, value);
}
else if (o->ob_type->tp_as_sequence->sq_ass_item) {
type_error("sequence index must be "
"integer, not '%.200s'", key);
return -1;
}
}
type_error("'%.200s' object does not support item assignment", o);
return -1;
}
- 看下面两端是不是很眼熟,Python中Tuple赋值的底层报错代码在这里:
type_error("'%.200s' object does not support item assignment", o);
Traceback (most recent call last): File "<stdin>", line 1, in <module> TypeError: 'tuple' object does not support item assignment
- 再看下面这段,需要查看PyTuple_Type对象的tp_as_sequence映射。
if (o->ob_type->tp_as_sequence) { if (PyIndex_Check(key)) { Py_ssize_t key_value; key_value = PyNumber_AsSsize_t(key, PyExc_IndexError); if (key_value == -1 && PyErr_Occurred()) return -1; return PySequence_SetItem(o, key_value, value); } else if (o->ob_type->tp_as_sequence->sq_ass_item) { type_error("sequence index must be " "integer, not '%.200s'", key); return -1; } }
- 查看PyTuple_Type对象的tp_as_sequence映射,sq_ass_item为0。
... ...
&tuple_as_sequence, /* tp_as_sequence */
... ...
Objects\tupleobject.c
static PySequenceMethods tuple_as_sequence = {
(lenfunc)tuplelength, /* sq_length */
(binaryfunc)tupleconcat, /* sq_concat */
(ssizeargfunc)tuplerepeat, /* sq_repeat */
(ssizeargfunc)tupleitem, /* sq_item */
0, /* sq_slice */
0, /* sq_ass_item */
0, /* sq_ass_slice */
(objobjproc)tuplecontains, /* sq_contains */
};
- 所以在PyObject_SetItem中,最终会触发type_error。
五、缓冲池
- 为了避免频繁申请、释放内存空间,系统默认元素小于20个的Tuple使用缓冲池。
- 系统还默认缓冲池最多储存2000个PyTupleObject对象。
Objects\tupleobject.c
/* Speed optimization to avoid frequent malloc/free of small tuples */
#ifndef PyTuple_MAXSAVESIZE
#define PyTuple_MAXSAVESIZE 20 /* Largest tuple to save on free list */
#endif
#ifndef PyTuple_MAXFREELIST
#define PyTuple_MAXFREELIST 2000 /* Maximum number of tuples of each size to save */
#endif
#if PyTuple_MAXSAVESIZE > 0
/* Entries 1 up to PyTuple_MAXSAVESIZE are free lists, entry 0 is the empty
tuple () of which at most one instance will be allocated.
*/
static PyTupleObject *free_list[PyTuple_MAXSAVESIZE];
static int numfree[PyTuple_MAXSAVESIZE];
#endif
- 缓冲池机制与PyListObject类似,在销毁元素时,将空的PyTupleObject装到缓冲池中。
Objects\tupleobject.c
static void
tupledealloc(PyTupleObject *op)
{
Py_ssize_t i;
Py_ssize_t len = Py_SIZE(op);
PyObject_GC_UnTrack(op);
Py_TRASHCAN_SAFE_BEGIN(op)
if (len > 0) {
i = len;
while (--i >= 0)
Py_XDECREF(op->ob_item[i]);
#if PyTuple_MAXSAVESIZE > 0
if (len < PyTuple_MAXSAVESIZE &&
numfree[len] < PyTuple_MAXFREELIST &&
Py_TYPE(op) == &PyTuple_Type)
{
op->ob_item[0] = (PyObject *) free_list[len];
numfree[len]++;
free_list[len] = op;
goto done; /* return */
}
#endif
}
Py_TYPE(op)->tp_free((PyObject *)op);
done:
Py_TRASHCAN_SAFE_END(op)
}
- 在创建对象时,优先使用缓冲池中的对象。
#if PyTuple_MAXSAVESIZE > 0
if (size == 0 && free_list[0]) {
op = free_list[0];
Py_INCREF(op);
#ifdef COUNT_ALLOCS
tuple_zero_allocs++;
#endif
return (PyObject *) op;
}
if (size < PyTuple_MAXSAVESIZE && (op = free_list[size]) != NULL) {
free_list[size] = (PyTupleObject *) op->ob_item[0];
numfree[size]--;
网友评论