- 大师兄的Python源码学习笔记(五十九): Python的内存
- 大师兄的Python源码学习笔记(五十一): Python的内存
- 大师兄的Python源码学习笔记(五十六): Python的内存
- 大师兄的Python源码学习笔记(五十七): Python的内存
- 大师兄的Python源码学习笔记(五十八): Python的内存
- 大师兄的Python源码学习笔记(五十三): Python的内存
- 大师兄的Python源码学习笔记(五十二): Python的内存
- 大师兄的Python源码学习笔记(五十五): Python的内存
- 大师兄的Python源码学习笔记(五十四): Python的内存
- 大师兄的Python源码学习笔记(五十): Python的内存管
大师兄的Python源码学习笔记(五十七): Python的内存管理机制(十二)
大师兄的Python源码学习笔记(五十九): Python的内存管理机制(十四)
五、Python中的垃圾收集
4. 垃圾收集全景
- 回顾实际完成垃圾收集的collect方法:
gcmodule.c
/* This is the main function. Read this to understand how the
* collection process works. */
static Py_ssize_t
collect(int generation, Py_ssize_t *n_collected, Py_ssize_t *n_uncollectable,
int nofail)
{
int i;
Py_ssize_t m = 0; /* # objects collected */
Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
PyGC_Head *young; /* the generation we are examining */
PyGC_Head *old; /* next older generation */
PyGC_Head unreachable; /* non-problematic unreachable trash */
PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
PyGC_Head *gc;
_PyTime_t t1 = 0; /* initialize to prevent a compiler warning */
struct gc_generation_stats *stats = &_PyRuntime.gc.generation_stats[generation];
if (_PyRuntime.gc.debug & DEBUG_STATS) {
PySys_WriteStderr("gc: collecting generation %d...\n",
generation);
PySys_WriteStderr("gc: objects in each generation:");
for (i = 0; i < NUM_GENERATIONS; i++)
PySys_FormatStderr(" %zd",
gc_list_size(GEN_HEAD(i)));
PySys_WriteStderr("\ngc: objects in permanent generation: %zd",
gc_list_size(&_PyRuntime.gc.permanent_generation.head));
t1 = _PyTime_GetMonotonicClock();
PySys_WriteStderr("\n");
}
if (PyDTrace_GC_START_ENABLED())
PyDTrace_GC_START(generation);
/* update collection and allocation counters */
1. 将比当前处理的更年轻的代链表合并到当前代中
if (generation+1 < NUM_GENERATIONS)
_PyRuntime.gc.generations[generation+1].count += 1;
for (i = 0; i <= generation; i++)
_PyRuntime.gc.generations[i].count = 0;
/* merge younger generations with one we are currently collecting */
for (i = 0; i < generation; i++) {
gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
}
/* handy references */
young = GEN_HEAD(generation);
if (generation < NUM_GENERATIONS-1)
old = GEN_HEAD(generation+1);
else
old = young;
/* Using ob_refcnt and gc_refs, calculate which objects in the
* container set are reachable from outside the set (i.e., have a
* refcount greater than 0 when all the references within the
* set are taken into account).
*/
2. 在待处理链表上进行打破循环的模拟,寻找root object
update_refs(young);
subtract_refs(young);
/* Leave everything reachable from outside young in young, and move
* everything else (in young) to unreachable.
* NOTE: This used to move the reachable objects into a reachable
* set instead. But most things usually turn out to be reachable,
* so it's more efficient to move the unreachable things.
*/
3. 将待处理链表中的unreachable object转移到unreachable链表中,当前代中只剩下reachable object。
gc_list_init(&unreachable);
move_unreachable(young, &unreachable);
/* Move reachable objects to next generation. */
4. 将当前代中的reachable object合并到更老的代中。
if (young != old) {
if (generation == NUM_GENERATIONS - 2) {
_PyRuntime.gc.long_lived_pending += gc_list_size(young);
}
gc_list_merge(young, old);
}
else {
/* We only untrack dicts in full collections, to avoid quadratic
dict build-up. See issue #14775. */
untrack_dicts(young);
_PyRuntime.gc.long_lived_pending = 0;
_PyRuntime.gc.long_lived_total = gc_list_size(young);
}
/* All objects in unreachable are trash, but objects reachable from
* legacy finalizers (e.g. tp_del) can't safely be deleted.
*/
5. 将 unreachable链表中带有__del__函数的对象与其引用对象收集到finalizers链表中。
gc_list_init(&finalizers);
move_legacy_finalizers(&unreachable, &finalizers);
/* finalizers contains the unreachable objects with a legacy finalizer;
* unreachable objects reachable *from* those are also uncollectable,
* and we move those into the finalizers list too.
*/
move_legacy_finalizer_reachable(&finalizers);
/* Collect statistics on collectable objects found and print
* debugging information.
*/
for (gc = unreachable.gc.gc_next; gc != &unreachable;
gc = gc->gc.gc_next) {
m++;
if (_PyRuntime.gc.debug & DEBUG_COLLECTABLE) {
debug_cycle("collectable", FROM_GC(gc));
}
}
/* Clear weakrefs and invoke callbacks as necessary. */
6. 处理弱引用,尝试调用其callback操作
m += handle_weakrefs(&unreachable, old);
/* Call tp_finalize on objects which have one. */
finalize_garbage(&unreachable);
if (check_garbage(&unreachable)) {
revive_garbage(&unreachable);
gc_list_merge(&unreachable, old);
}
else {
/* Call tp_clear on objects in the unreachable set. This will cause
* the reference cycles to be broken. It may also cause some objects
* in finalizers to be freed.
*/
7. 对unreachable链表上的对象进行垃圾回收操作
delete_garbage(&unreachable, old);
}
/* Collect statistics on uncollectable objects found and print
* debugging information. */
for (gc = finalizers.gc.gc_next;
gc != &finalizers;
gc = gc->gc.gc_next) {
n++;
if (_PyRuntime.gc.debug & DEBUG_UNCOLLECTABLE)
debug_cycle("uncollectable", FROM_GC(gc));
}
if (_PyRuntime.gc.debug & DEBUG_STATS) {
_PyTime_t t2 = _PyTime_GetMonotonicClock();
if (m == 0 && n == 0)
PySys_WriteStderr("gc: done");
else
PySys_FormatStderr(
"gc: done, %zd unreachable, %zd uncollectable",
n+m, n);
PySys_WriteStderr(", %.4fs elapsed\n",
_PyTime_AsSecondsDouble(t2 - t1));
}
/* Append instances in the uncollectable set to a Python
* reachable list of garbage. The programmer has to deal with
* this if they insist on creating this type of structure.
*/
8. 将含有__del__操作的实例收集到garbage链表中,同时将finalizers链表中所有对象加入到old链表中。
handle_legacy_finalizers(&finalizers, old);
/* Clear free list only during the collection of the highest
* generation */
if (generation == NUM_GENERATIONS-1) {
clear_freelists();
}
if (PyErr_Occurred()) {
if (nofail) {
PyErr_Clear();
}
else {
if (gc_str == NULL)
gc_str = PyUnicode_FromString("garbage collection");
PyErr_WriteUnraisable(gc_str);
Py_FatalError("unexpected exception during garbage collection");
}
}
/* Update stats */
if (n_collected)
*n_collected = m;
if (n_uncollectable)
*n_uncollectable = n;
stats->collections++;
stats->collected += m;
stats->uncollectable += n;
if (PyDTrace_GC_DONE_ENABLED())
PyDTrace_GC_DONE(n+m);
return n+m;
}
- 可以注意到Python对弱引用weakref的处理,因为weakref能够注册callback操作,所以这个行为类似带有__del__的实例对象。
- 区别是weakref能够被正确的清理掉,而带有__del__的实例对象不能被自动清除,而是被放入garbage链表中。
gcmodule.c
static int
handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
{
PyGC_Head *gc;
PyObject *op; /* generally FROM_GC(gc) */
PyWeakReference *wr; /* generally a cast of op */
PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
PyGC_Head *next;
int num_freed = 0;
gc_list_init(&wrcb_to_call);
/* Clear all weakrefs to the objects in unreachable. If such a weakref
* also has a callback, move it into `wrcb_to_call` if the callback
* needs to be invoked. Note that we cannot invoke any callbacks until
* all weakrefs to unreachable objects are cleared, lest the callback
* resurrect an unreachable object via a still-active weakref. We
* make another pass over wrcb_to_call, invoking callbacks, after this
* pass completes.
*/
for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
PyWeakReference **wrlist;
op = FROM_GC(gc);
assert(IS_TENTATIVELY_UNREACHABLE(op));
next = gc->gc.gc_next;
if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
continue;
/* It supports weakrefs. Does it have any? */
wrlist = (PyWeakReference **)
PyObject_GET_WEAKREFS_LISTPTR(op);
/* `op` may have some weakrefs. March over the list, clear
* all the weakrefs, and move the weakrefs with callbacks
* that must be called into wrcb_to_call.
*/
for (wr = *wrlist; wr != NULL; wr = *wrlist) {
PyGC_Head *wrasgc; /* AS_GC(wr) */
/* _PyWeakref_ClearRef clears the weakref but leaves
* the callback pointer intact. Obscure: it also
* changes *wrlist.
*/
assert(wr->wr_object == op);
_PyWeakref_ClearRef(wr);
assert(wr->wr_object == Py_None);
if (wr->wr_callback == NULL)
continue; /* no callback */
/* Headache time. `op` is going away, and is weakly referenced by
* `wr`, which has a callback. Should the callback be invoked? If wr
* is also trash, no:
*
* 1. There's no need to call it. The object and the weakref are
* both going away, so it's legitimate to pretend the weakref is
* going away first. The user has to ensure a weakref outlives its
* referent if they want a guarantee that the wr callback will get
* invoked.
*
* 2. It may be catastrophic to call it. If the callback is also in
* cyclic trash (CT), then although the CT is unreachable from
* outside the current generation, CT may be reachable from the
* callback. Then the callback could resurrect insane objects.
*
* Since the callback is never needed and may be unsafe in this case,
* wr is simply left in the unreachable set. Note that because we
* already called _PyWeakref_ClearRef(wr), its callback will never
* trigger.
*
* OTOH, if wr isn't part of CT, we should invoke the callback: the
* weakref outlived the trash. Note that since wr isn't CT in this
* case, its callback can't be CT either -- wr acted as an external
* root to this generation, and therefore its callback did too. So
* nothing in CT is reachable from the callback either, so it's hard
* to imagine how calling it later could create a problem for us. wr
* is moved to wrcb_to_call in this case.
*/
if (IS_TENTATIVELY_UNREACHABLE(wr))
continue;
assert(IS_REACHABLE(wr));
/* Create a new reference so that wr can't go away
* before we can process it again.
*/
Py_INCREF(wr);
/* Move wr to wrcb_to_call, for the next pass. */
wrasgc = AS_GC(wr);
assert(wrasgc != next); /* wrasgc is reachable, but
next isn't, so they can't
be the same */
gc_list_move(wrasgc, &wrcb_to_call);
}
}
/* Invoke the callbacks we decided to honor. It's safe to invoke them
* because they can't reference unreachable objects.
*/
while (! gc_list_is_empty(&wrcb_to_call)) {
PyObject *temp;
PyObject *callback;
gc = wrcb_to_call.gc.gc_next;
op = FROM_GC(gc);
assert(IS_REACHABLE(op));
assert(PyWeakref_Check(op));
wr = (PyWeakReference *)op;
callback = wr->wr_callback;
assert(callback != NULL);
/* copy-paste of weakrefobject.c's handle_callback() */
temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
if (temp == NULL)
PyErr_WriteUnraisable(callback);
else
Py_DECREF(temp);
/* Give up the reference we created in the first pass. When
* op's refcount hits 0 (which it may or may not do right now),
* op's tp_dealloc will decref op->wr_callback too. Note
* that the refcount probably will hit 0 now, and because this
* weakref was reachable to begin with, gc didn't already
* add it to its count of freed objects. Example: a reachable
* weak value dict maps some key to this reachable weakref.
* The callback removes this key->weakref mapping from the
* dict, leaving no other references to the weakref (excepting
* ours).
*/
Py_DECREF(op);
if (wrcb_to_call.gc.gc_next == gc) {
/* object is still alive -- move it */
gc_list_move(gc, old);
}
else
++num_freed;
}
return num_freed;
}
- 到这里可以看出,Python的垃圾收集机制完全是为了处理循环引用而设计的。
- 虽然几乎大多数对象在创建时都会通过PyObject_GC_New,并最终调用_PyObject_GC_New,将创建的对象纳入垃圾收集机制的监控中。
- 但垃圾收集监控的对象并非只有垃圾收集机制才能回收,正常的引用计数就能销毁掉一个被纳入垃圾回收机制监控的对象:
funcobject.c
static void
func_dealloc(PyFunctionObject *op)
{
_PyObject_GC_UNTRACK(op);
if (op->func_weakreflist != NULL)
PyObject_ClearWeakRefs((PyObject *) op);
Py_DECREF(op->func_code);
Py_DECREF(op->func_globals);
Py_XDECREF(op->func_module);
Py_DECREF(op->func_name);
Py_XDECREF(op->func_defaults);
Py_XDECREF(op->func_kwdefaults);
Py_XDECREF(op->func_doc);
Py_XDECREF(op->func_dict);
Py_XDECREF(op->func_closure);
Py_XDECREF(op->func_annotations);
Py_XDECREF(op->func_qualname);
PyObject_GC_Del(op);
}
Modules\gcmodule.c
void
PyObject_GC_Del(void *op)
{
PyGC_Head *g = AS_GC(op);
if (IS_TRACKED(op))
gc_list_remove(g);
if (_PyRuntime.gc.generations[0].count > 0) {
_PyRuntime.gc.generations[0].count--;
}
PyObject_FREE(g);
}
- 如果PyFunctionObject对象因为正常的引用计数维护到达引用计数为0的状态,就会调用func_dealloc。
- 在这里,PyFunctionObject对象主动将自己从垃圾收集监控的链表中摘除,然后调用PyObject_GC_Del释放内存。
- 之所以调用PyObject_GC_Del,主要为了将指向PyObject的指针调整为指向PyGC_Head的指针,以释放正确的内存。
- 所以,虽然有很多对象挂在垃圾收集机制监控的链表上,但更多时候是引用计数机制在维护这些对象。
- 只有面对引用计数无能为力的循环引用,垃圾收集机制才会生效。
- 实际上面对除了循环引用之外的对象,垃圾收集是无能为力的:
- 因为挂在垃圾收集机制上的对象都是引用计数不为0的,如果为0则已经被引用计数处理了。
- 而引用计数不为0的对象有两种情况,一是被程序使用的对象,二是循环用用中的对象。
- 被程序使用的对象不能回收,所以垃圾回收面对的只有循环引用中的对象。
- 另外在大多数情况下,Python都在使用内存池,所以垃圾收集和内存管理是融为一体的。
网友评论