大师兄的Python源码学习笔记(三十七): 模块的动态加载机制(四)
大师兄的Python源码学习笔记(三十九): Python的多线程机制(一)
四、Python中的import操作
- 本章将研究import操作所对应的字节码指令序列。
1. import module
demo.py
>>>import sys
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (None)
4 IMPORT_NAME 0 (sys)
6 STORE_NAME 0 (sys)
8 LOAD_CONST 1 (None)
10 RETURN_VALUE
- 这部分代码前面的章节已经涉及到,最终IMPORT_NAME会将加载后的PyModuleObject对象压到运行时栈内。
ceval.c
import_name(PyFrameObject *f, PyObject *name, PyObject *fromlist, PyObject *level)
{
_Py_IDENTIFIER(__import__);
PyObject *import_func, *res;
PyObject* stack[5];
import_func = _PyDict_GetItemId(f->f_builtins, &PyId___import__);
if (import_func == NULL) {
PyErr_SetString(PyExc_ImportError, "__import__ not found");
return NULL;
}
/* Fast path for not overloaded __import__. */
if (import_func == PyThreadState_GET()->interp->import_func) {
int ilevel = _PyLong_AsInt(level);
if (ilevel == -1 && PyErr_Occurred()) {
return NULL;
}
res = PyImport_ImportModuleLevelObject(
name,
f->f_globals,
f->f_locals == NULL ? Py_None : f->f_locals,
fromlist,
ilevel);
return res;
}
Py_INCREF(import_func);
stack[0] = name;
stack[1] = f->f_globals;
stack[2] = f->f_locals == NULL ? Py_None : f->f_locals;
stack[3] = fromlist;
stack[4] = level;
res = _PyObject_FastCall(import_func, stack, 5);
Py_DECREF(import_func);
return res;
}
2. import package
demo.py
>>>import os.path
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (None)
4 IMPORT_NAME 0 (os.path)
6 STORE_NAME 1 (os)
8 LOAD_CONST 1 (None)
10 RETURN_VALUE
- 如果import的动作涉及package,那么IMPORT_NAME指令的指令参数将是module的完整路径信息。
- IMPORT_NAME指令内部将解析出绝对路径。
Python\import.c
static PyObject *
resolve_name(PyObject *name, PyObject *globals, int level)
{
_Py_IDENTIFIER(__spec__);
_Py_IDENTIFIER(__package__);
_Py_IDENTIFIER(__path__);
_Py_IDENTIFIER(__name__);
_Py_IDENTIFIER(parent);
PyObject *abs_name;
PyObject *package = NULL;
PyObject *spec;
Py_ssize_t last_dot;
PyObject *base;
int level_up;
if (globals == NULL) {
PyErr_SetString(PyExc_KeyError, "'__name__' not in globals");
goto error;
}
if (!PyDict_Check(globals)) {
PyErr_SetString(PyExc_TypeError, "globals must be a dict");
goto error;
}
package = _PyDict_GetItemId(globals, &PyId___package__);
if (package == Py_None) {
package = NULL;
}
spec = _PyDict_GetItemId(globals, &PyId___spec__);
if (package != NULL) {
Py_INCREF(package);
if (!PyUnicode_Check(package)) {
PyErr_SetString(PyExc_TypeError, "package must be a string");
goto error;
}
else if (spec != NULL && spec != Py_None) {
int equal;
PyObject *parent = _PyObject_GetAttrId(spec, &PyId_parent);
if (parent == NULL) {
goto error;
}
equal = PyObject_RichCompareBool(package, parent, Py_EQ);
Py_DECREF(parent);
if (equal < 0) {
goto error;
}
else if (equal == 0) {
if (PyErr_WarnEx(PyExc_ImportWarning,
"__package__ != __spec__.parent", 1) < 0) {
goto error;
}
}
}
}
else if (spec != NULL && spec != Py_None) {
package = _PyObject_GetAttrId(spec, &PyId_parent);
if (package == NULL) {
goto error;
}
else if (!PyUnicode_Check(package)) {
PyErr_SetString(PyExc_TypeError,
"__spec__.parent must be a string");
goto error;
}
}
else {
if (PyErr_WarnEx(PyExc_ImportWarning,
"can't resolve package from __spec__ or __package__, "
"falling back on __name__ and __path__", 1) < 0) {
goto error;
}
package = _PyDict_GetItemId(globals, &PyId___name__);
if (package == NULL) {
PyErr_SetString(PyExc_KeyError, "'__name__' not in globals");
goto error;
}
Py_INCREF(package);
if (!PyUnicode_Check(package)) {
PyErr_SetString(PyExc_TypeError, "__name__ must be a string");
goto error;
}
if (_PyDict_GetItemId(globals, &PyId___path__) == NULL) {
Py_ssize_t dot;
if (PyUnicode_READY(package) < 0) {
goto error;
}
dot = PyUnicode_FindChar(package, '.',
0, PyUnicode_GET_LENGTH(package), -1);
if (dot == -2) {
goto error;
}
if (dot >= 0) {
PyObject *substr = PyUnicode_Substring(package, 0, dot);
if (substr == NULL) {
goto error;
}
Py_SETREF(package, substr);
}
}
}
last_dot = PyUnicode_GET_LENGTH(package);
if (last_dot == 0) {
PyErr_SetString(PyExc_ImportError,
"attempted relative import with no known parent package");
goto error;
}
for (level_up = 1; level_up < level; level_up += 1) {
last_dot = PyUnicode_FindChar(package, '.', 0, last_dot, -1);
if (last_dot == -2) {
goto error;
}
else if (last_dot == -1) {
PyErr_SetString(PyExc_ValueError,
"attempted relative import beyond top-level "
"package");
goto error;
}
}
base = PyUnicode_Substring(package, 0, last_dot);
Py_DECREF(package);
if (base == NULL || PyUnicode_GET_LENGTH(name) == 0) {
return base;
}
abs_name = PyUnicode_FromFormat("%U.%U", base, name);
Py_DECREF(base);
return abs_name;
error:
Py_XDECREF(package);
return NULL;
}
3. from a import b
demo.py
>>>from os import path
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (('path',))
4 IMPORT_NAME 0 (os)
6 IMPORT_FROM 1 (path)
8 STORE_NAME 1 (path)
10 POP_TOP
12 LOAD_CONST 2 (None)
14 RETURN_VALUE
- 如果包含from,字节码指令中的LOAD_CONST 1指令参数将不再是None,而是一个tuple对象,也就是源码中的from_list。
- 随后IMPORT_NAME最终将返回os对应的module对象,供IMPORT_FROM使用:
ceval.c
TARGET(IMPORT_FROM) {
PyObject *name = GETITEM(names, oparg);
PyObject *from = TOP();
PyObject *res;
res = import_from(from, name);
PUSH(res);
if (res == NULL)
goto error;
DISPATCH();
}
ceval.c
static PyObject *
import_from(PyObject *v, PyObject *name)
{
PyObject *x;
_Py_IDENTIFIER(__name__);
PyObject *fullmodname, *pkgname, *pkgpath, *pkgname_or_unknown, *errmsg;
... ...
x = PyImport_GetModule(fullmodname);
Py_DECREF(fullmodname);
... ...
return x;
... ...
}
- IMPORT_FROM会在IMPORT_NAME结果对应module对象的名字空间中搜索符号,并将结果捏合在一起存放在当前local名字空间中。
4. from a import *
demo.py
>>>from os import *
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (('*',))
4 IMPORT_NAME 0 (os)
6 IMPORT_STAR
8 LOAD_CONST 2 (None)
10 RETURN_VALUE
- 如果from搜索的结果为*,最大的区别是IMPORT_STAR替换了IMPORT_FROM:
ceval.c
TARGET(IMPORT_STAR) {
PyObject *from = POP(), *locals;
int err;
if (PyFrame_FastToLocalsWithError(f) < 0) {
Py_DECREF(from);
goto error;
}
locals = f->f_locals;
if (locals == NULL) {
PyErr_SetString(PyExc_SystemError,
"no locals found during 'import *'");
Py_DECREF(from);
goto error;
}
err = import_all_from(locals, from);
PyFrame_LocalsToFast(f, 0);
Py_DECREF(from);
if (err != 0)
goto error;
DISPATCH();
}
ceval.c
static int
import_all_from(PyObject *locals, PyObject *v)
{
_Py_IDENTIFIER(__all__);
_Py_IDENTIFIER(__dict__);
PyObject *all, *dict, *name, *value;
int skip_leading_underscores = 0;
int pos, err;
if (_PyObject_LookupAttrId(v, &PyId___all__, &all) < 0) {
return -1; /* Unexpected error */
}
if (all == NULL) {
if (_PyObject_LookupAttrId(v, &PyId___dict__, &dict) < 0) {
return -1;
}
if (dict == NULL) {
PyErr_SetString(PyExc_ImportError,
"from-import-* object has no __dict__ and no __all__");
return -1;
}
all = PyMapping_Keys(dict);
Py_DECREF(dict);
if (all == NULL)
return -1;
skip_leading_underscores = 1;
}
for (pos = 0, err = 0; ; pos++) {
name = PySequence_GetItem(all, pos);
... ...
value = PyObject_GetAttr(v, name);
if (value == NULL)
err = -1;
else if (PyDict_CheckExact(locals))
err = PyDict_SetItem(locals, name, value);
else
err = PyObject_SetItem(locals, name, value);
Py_DECREF(name);
Py_XDECREF(value);
if (err != 0)
break;
}
Py_DECREF(all);
return err;
}
- 这种形式使用了module文件中的特殊符号__all__,它可以控制module想要暴露给外界的符号。
- 最终通过for循环将__all__中name对应的对象与IMPORT_NAME的结果捏合在一起,并存放在当前local名字空间中。
5. import a as b
demo.py
>>>import pandas as pd
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (None)
4 IMPORT_NAME 0 (pandas)
6 STORE_NAME 1 (pd)
8 LOAD_CONST 1 (None)
10 RETURN_VALUE
- as的重命名机制很简单,就是通过STORE_NAME指令向当前local名字空间引入符号时,加入指定的符号作为参数替换默认符号。
6. reload
demo.py
>>>import importlib,os
>>>importlib.reload(os)
1 0 LOAD_CONST 0 (0)
2 LOAD_CONST 1 (None)
4 IMPORT_NAME 0 (importlib)
6 STORE_NAME 0 (importlib)
8 LOAD_CONST 0 (0)
10 LOAD_CONST 1 (None)
12 IMPORT_NAME 1 (os)
14 STORE_NAME 1 (os)
2 16 LOAD_NAME 0 (importlib)
18 LOAD_METHOD 2 (reload)
20 LOAD_NAME 1 (os)
22 CALL_METHOD 1
24 POP_TOP
26 LOAD_CONST 1 (None)
28 RETURN_VALUE
- 可以从源码看出,reload在Python3中实际就是执行了importlib包中的reload函数:
>>>def reload(module):
... ...
>>> try:
>>> name = module.__spec__.name
>>> except AttributeError:
>>> name = module.__name__
>>> if sys.modules.get(name) is not module:
>>> msg = "module {} not in sys.modules"
>>> raise ImportError(msg.format(name), name=name)
>>> if name in _RELOADING:
>>> return _RELOADING[name]
>>> _RELOADING[name] = module
>>> try:
>>> parent_name = name.rpartition('.')[0]
>>> if parent_name:
>>> try:
>>> parent = sys.modules[parent_name]
>>> except KeyError:
>>> msg = "parent {!r} not in sys.modules"
>>> raise ImportError(msg.format(parent_name),
>>> name=parent_name) from None
>>> else:
>>> pkgpath = parent.__path__
>>> else:
>>> pkgpath = None
>>> target = module
>>> spec = module.__spec__ = _bootstrap._find_spec(name, pkgpath, target)
>>> if spec is None:
>>> raise ModuleNotFoundError(f"spec not found for the module {name!r}", name=name)
>>> _bootstrap._exec(spec, module)
>>> # The module may have replaced itself in sys.modules!
>>> return sys.modules[name]
>>> finally:
>>> try:
>>> del _RELOADING[name]
>>> except KeyError:
>>> pass
网友评论