大师兄的Python源码学习笔记(十四): 虚拟机中的控制流(一)
大师兄的Python源码学习笔记(十六): 虚拟机中的控制流(三)
二、虚拟机中的for循环控制流
- 与if控制结构不同,for循环控制结构包含一种特殊的指令跳跃方式:指令回退。
- 先写一段简单的代码:
demo.py
for i in range(10):
print(1)
- 生成的字节码指令序列如下:
1 0 SETUP_LOOP 24 (to 26)
2 LOAD_NAME 0 (range)
4 LOAD_CONST 0 (10)
6 CALL_FUNCTION 1
8 GET_ITER
>> 10 FOR_ITER 12 (to 24)
12 STORE_NAME 1 (i)
2 14 LOAD_NAME 2 (print)
16 LOAD_CONST 1 (1)
18 CALL_FUNCTION 1
20 POP_TOP
22 JUMP_ABSOLUTE 10
>> 24 POP_BLOCK
>> 26 LOAD_CONST 2 (None)
28 RETURN_VALUE
1. 循环控制结构的初始化
- for循环控制流的实现从SETUP_LOOP开始:
ceval.c
TARGET(SETUP_LOOP)
TARGET(SETUP_EXCEPT)
TARGET(SETUP_FINALLY) {
/* NOTE: If you add any new block-setup opcodes that
are not try/except/finally handlers, you may need
to update the PyGen_NeedsFinalizing() function.
*/
PyFrame_BlockSetup(f, opcode, INSTR_OFFSET() + oparg,
STACK_LEVEL());
DISPATCH();
}
- 可以看到,应对SETUP_LOOP虚拟机调用了PyFrame_BlockSetup。
Objects\frameobject.c
void
PyFrame_BlockSetup(PyFrameObject *f, int type, int handler, int level)
{
PyTryBlock *b;
if (f->f_iblock >= CO_MAXBLOCKS)
Py_FatalError("XXX block stack overflow");
b = &f->f_blockstack[f->f_iblock++];
b->b_type = type;
b->b_level = level;
b->b_handler = handler;
}
- 而PyFrame_BlockSetup使用了PyFrameObject中的f_iblock。
Include\frameobject.h
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
PyObject *f_trace; /* Trace function */
char f_trace_lines; /* Emit per-line trace events? */
char f_trace_opcodes; /* Emit per-opcode trace events? */
/* Borrowed reference to a generator, or NULL */
PyObject *f_gen;
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
char f_executing; /* whether the frame is still executing */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
- CO_MAXBLOCK是一个值为20的常量,用来限制静态嵌套块的数量。
Include\frameobject.h
#define CO_MAXBLOCKS 20 /* Max static block nesting within a function */
- f_blockstack是一个PyTryBlock结构,结构中存放了PyTryBlock的类型、当前执行的字节码指令、栈的深度等信息,用于循环控制流。
Include\frameobject.h
typedef struct {
int b_type; /* what kind of block this is */
int b_handler; /* where to jump to find handler */
int b_level; /* value stack level to pop to */
} PyTryBlock;
2. list迭代器
- 案例中,在申请PyTryBlock结构的空间后,虚拟机通过LOAD_NAME、LOAD_CONST和CALL_FUNCTION三个字节码指令,完成range(10)的操作,将一个可迭代对象压入运行时栈。
2 LOAD_NAME 0 (range)
4 LOAD_CONST 0 (10)
6 CALL_FUNCTION 1
8 GET_ITER
- 紧接着通过GET_ITER字节码指令来获得可迭代对象的迭代器。
ceval.c
TARGET(GET_ITER) {
/* before: [obj]; after [getiter(obj)] */
PyObject *iterable = TOP();
PyObject *iter = PyObject_GetIter(iterable);
Py_DECREF(iterable);
SET_TOP(iter);
if (iter == NULL)
goto error;
PREDICT(FOR_ITER);
PREDICT(CALL_FUNCTION);
DISPATCH();
}
- 虚拟机首先通过TOP函数获得运行时栈顶的可迭代对象。
ceval.c #define TOP() (stack_pointer[-1])
- 然后通过PyObject_GetIter函数获得对应的迭代器,即t->tp_iter。
Objects\abstract.c PyObject * PyObject_GetIter(PyObject *o) { PyTypeObject *t = o->ob_type; getiterfunc f; f = t->tp_iter; if (f == NULL) { if (PySequence_Check(o)) return PySeqIter_New(o); return type_error("'%.200s' object is not iterable", o); } else { PyObject *res = (*f)(o); if (res != NULL && !PyIter_Check(res)) { PyErr_Format(PyExc_TypeError, "iter() returned non-iterator " "of type '%.100s'", res->ob_type->tp_name); Py_DECREF(res); res = NULL; } return res; } }
- 迭代器对象的对象类型如下:
Objects\iterobject.c
PyTypeObject PySeqIter_Type = {
PyVarObject_HEAD_INIT(&PyType_Type, 0)
"iterator", /* tp_name */
sizeof(seqiterobject), /* tp_basicsize */
0, /* tp_itemsize */
/* methods */
(destructor)iter_dealloc, /* tp_dealloc */
0, /* tp_print */
0, /* tp_getattr */
0, /* tp_setattr */
0, /* tp_reserved */
0, /* tp_repr */
0, /* tp_as_number */
0, /* tp_as_sequence */
0, /* tp_as_mapping */
0, /* tp_hash */
0, /* tp_call */
0, /* tp_str */
PyObject_GenericGetAttr, /* tp_getattro */
0, /* tp_setattro */
0, /* tp_as_buffer */
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
0, /* tp_doc */
(traverseproc)iter_traverse, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
PyObject_SelfIter, /* tp_iter */
iter_iternext, /* tp_iternext */
seqiter_methods, /* tp_methods */
0, /* tp_members */
};
- 观察迭代器对象的创建函数PySeqIter_New:
Objects\iterobject.c
PyObject *
PySeqIter_New(PyObject *seq)
{
seqiterobject *it;
if (!PySequence_Check(seq)) {
PyErr_BadInternalCall();
return NULL;
}
it = PyObject_GC_New(seqiterobject, &PySeqIter_Type);
if (it == NULL)
return NULL;
it->it_index = 0;
Py_INCREF(seq);
it->it_seq = seq;
_PyObject_GC_TRACK(it);
return (PyObject *)it;
}
- 可以看出可迭代对象的迭代器对象对可迭代对象进行了简单的封装,并通过序号it_index实现遍历。
3. 迭代控制
- 为了避免每次循环都要创建迭代器,源码级的循环控制结构一定对应着虚拟机一级的循环控制结构。
- 而Python虚拟机一级的迭代循环,从FOR_ITER字节码指令开始。
>> 10 FOR_ITER 12 (to 24)
ceval.c
PREDICTED(FOR_ITER);
TARGET(FOR_ITER) {
/* before: [iter]; after: [iter, iter()] *or* [] */
PyObject *iter = TOP();
PyObject *next = (*iter->ob_type->tp_iternext)(iter);
if (next != NULL) {
PUSH(next);
PREDICT(STORE_FAST);
PREDICT(UNPACK_SEQUENCE);
DISPATCH();
}
if (PyErr_Occurred()) {
if (!PyErr_ExceptionMatches(PyExc_StopIteration))
goto error;
else if (tstate->c_tracefunc != NULL)
call_exc_trace(tstate->c_tracefunc, tstate->c_traceobj, tstate, f);
PyErr_Clear();
}
/* iterator ended normally */
STACKADJ(-1);
Py_DECREF(iter);
JUMPBY(oparg);
PREDICT(POP_BLOCK);
DISPATCH();
}
- FOR_ITER首先通过TOP()函数获得运行时栈顶的迭代器对象,然后通过tp_iternext开始迭代:
Objects\iterobject.c
static PyObject *
iter_iternext(PyObject *iterator)
{
seqiterobject *it;
PyObject *seq;
PyObject *result;
assert(PySeqIter_Check(iterator));
it = (seqiterobject *)iterator;
seq = it->it_seq;
if (seq == NULL)
return NULL;
if (it->it_index == PY_SSIZE_T_MAX) {
PyErr_SetString(PyExc_OverflowError,
"iter index too large");
return NULL;
}
result = PySequence_GetItem(seq, it->it_index);
if (result != NULL) {
it->it_index++;
return result;
}
if (PyErr_ExceptionMatches(PyExc_IndexError) ||
PyErr_ExceptionMatches(PyExc_StopIteration))
{
PyErr_Clear();
it->it_seq = NULL;
Py_DECREF(seq);
}
return NULL;
}
- 这段代码通过PySequence_GetItem获取下一条元素对象返回。
- 当跳转到字节码指令JUMP_ABSOLUTE时,虚拟机实现了字节码的向后回退动作,而Python虚拟机也在FOR_ITER指令和JUMP_ABSOLUTE指令之间成功构造出一个循环结构。
ceval.c
PREDICTED(JUMP_ABSOLUTE);
TARGET(JUMP_ABSOLUTE) {
JUMPTO(oparg);
4. 终止迭代
>> 10 FOR_ITER 12 (to 24)
12 STORE_NAME 1 (i)
2 14 LOAD_NAME 2 (print)
16 LOAD_CONST 1 (1)
18 CALL_FUNCTION 1
20 POP_TOP
22 JUMP_ABSOLUTE 10
>> 24 POP_BLOCK
- 当FOR_ITER中的result返回NULL时,则循环结束,通过JUMPBY函数向前跳跃到POP_BLOCK字节码。
ceval.c
#define JUMPBY(x) (next_instr += (x) / sizeof(_Py_CODEUNIT))
PREDICTED(POP_BLOCK);
TARGET(POP_BLOCK) {
PyTryBlock *b = PyFrame_BlockPop(f);
UNWIND_BLOCK(b);
DISPATCH();
}
- POP_BLOCK实际上是将之前申请的PyTryBlock结构归还给了f->f_blockstack。
ceval.c
#define UNWIND_BLOCK(b) \
while (STACK_LEVEL() > (b)->b_level) { \
PyObject *v = POP(); \
Py_XDECREF(v); \
}
- 虚拟机会抽取在SETUP_LOOP指令处保存在PyTryBlock中的信息,并根据其中存的栈深度将运行时栈恢复到SETUP_LOOP之前的状态。