CPython源码阅读笔记(1)

2017-06-21 Mithrilwoodrat 更多博文 » 博客 » GitHub »

原文链接 http://woodrat.xyz/2017/06/21/CPython%E6%BA%90%E7%A0%81%E9%98%85%E8%AF%BB%E7%AC%94%E8%AE%B0%281%29
注：以下为加速网络访问所做的原文缓存，经过重新格式化，可能存在格式方面的问题，或偶有遗漏信息，请以原文为准。

准备调试环境

目前 CPython 的开发已经迁移到了 Github 上，可以直接去 Github clone 对应的分支。我们将基于 Python 2.7.13 版本， Linux x86_64 环境进行接下来的工作。下载好代码以后以

./configure --with-pydebug

make -j2

编译。

调试可以直接使用 GDB，然后使用 Emacs + Ctags 看代码。(喜欢使用 IDE 的话可以考虑 eclipse-cdt)

寻找参考资料

官方的 Python Developer’s Guide 是开始时必备的资料。 readthedocs 上另外一个版本的 Python Developer’s Guide 更详细一点。 Exploring CPython’s Internals 一节列出了 CPython 的目录结构，以及推荐了几篇很有参考价值的文章。

Internals of CPython (这篇比较长，写的比较仔细)
Yet another guided tour of CPython (这篇的作者是 Guido)

可以参考 devguide 的 compiler 一节来调试 Python 解释器，跟着执行流程一步步看代码。

另外 Philip Guo 的博客上关于 Python 的博文也十分有价值。

中文资料中《Python 源码剖析》一定不能错过。

从 main 开始

在 gdb 中直接 b main，输入 list 可以看到对应 main 函数的代码为

// Modules/python.c
#include "Python.h"

int
main(int argc, char **argv)
{
    return Py_Main(argc, argv);
}

在编辑器中打开对应文件，在 Python 2.7 中位于 Modules/python.c，该文件只是一个入口，直接调用了 Py_Main。Py_Main 位于 Modules/main.c 中，该函数的主要作用如下：

初始化环境变量和命令行参数
如果参数里有 -R 则调用 _PyRandom_Init 初始化 Hash 算法的随机数生成，使得 dict 对象中 key 的顺序每次启动时随机。
调用 Py_Initialize 初始化 Python 解释器
根据命令行选项决定是运行模块 -m RunModule，还是运行一条语句 -c PyRun_SimpleStringFlags，或是运行一个文件PyRun_AnyFileExFlags

我们先跟一下 PyRun_SimpleStringFlags, 该函数添加了一个默认的 __main__ 后直接调用了 PyRun_StringFlags。

截取 PyRun_StringFlags 函数部分代码如下

PyParser_ASTFromString 对输入的源码进行词法语法分析生成 AST，run_mod生成字节码并运行。

// Python/pythonrun.c PyRun_StringFlags
PyObject *
PyRun_StringFlags(const char *str, int start, PyObject *globals,
                  PyObject *locals, PyCompilerFlags *flags)
{
    PyObject *ret = NULL;
    mod_ty mod;
    PyArena *arena = PyArena_New(); /* 为编译阶段申请一块内存池 */
    if (arena == NULL)
        return NULL;

    mod = PyParser_ASTFromString(str, "<string>", start, flags, arena); /* 将语句解析为 AST */
    if (mod != NULL)
        ret = run_mod(mod, "<string>", globals, locals, flags, arena); /* 调用 run_mod */
    PyArena_Free(arena);
    return ret;
}

词法语法分析

PyParser_ASTFromString 先将源码解析为 Parser Tree，再调用 PyAST_FromNode，将 Parser Tree 转换为 AST。

// Python/pythonrun.c 54 行 引用了 graminit 中的 grammar 结构，该结构将在下面几个函数中以 `g` 变量名传递 
extern grammar _PyParser_Grammar; /* From graminit.c */ 
// Python/pythonrun.c PyParser_ASTFromString
mod_ty
PyParser_ASTFromString(const char *s, const char *filename, int start,
                       PyCompilerFlags *flags, PyArena *arena)
{
    mod_ty mod;
    PyCompilerFlags localflags;
    perrdetail err;
    int iflags = PARSER_FLAGS(flags);

    // 将源码解析为了 Parser Tree
    node *n = PyParser_ParseStringFlagsFilenameEx(s, filename,
                                    &_PyParser_Grammar, start, &err,
                                    &iflags);
    if (flags == NULL) {
        localflags.cf_flags = 0;
        flags = &localflags;
    }
    if (n) {
        flags->cf_flags |= iflags & PyCF_MASK;
        // 将 Parser Tree 转换为 AST
        mod = PyAST_FromNode(n, flags, filename, arena);
        PyNode_Free(n);
        return mod; // 返回 AST
    }
    else {
        err_input(&err);
        return NULL;
    }
}

生成 TOKEN

// Parser/parsetok.c PyParser_ParseStringFlagsFilenameEx
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
                          grammar *g, int start,
                          perrdetail *err_ret, int *flags)
{
    struct tok_state *tok;

    initerr(err_ret, filename);
    // 初始化词法分析模块
    if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) {
        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
        return NULL;
    }

    tok->filename = filename ? filename : "<string>";
    if (Py_TabcheckFlag || Py_VerboseFlag) {
        tok->altwarning = (tok->filename != NULL);
        if (Py_TabcheckFlag >= 2)
            tok->alterror++;
    }
    // 调用 parsetok ，将源码转换为 token 序列
    return parsetok(tok, g, start, err_ret, flags);

parsetok 循环直到读完整个字符串，将分出来的 token 添加到 parser 中，最后返回 parser tree token 的 type 定义在 Include/token.h 中

// Parser/parsetok.c parsetok
static node *
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
         int *flags)
{
    // 初始化 parser 
    if ((ps = PyParser_New(g, start)) == NULL) {
    ....
    // 获取 token type
    type = PyTokenizer_Get(tok, &a, &b);
    ....
    // 将 token 赋值给 str
    len = b - a; /* XXX this may compute NULL - NULL */
    str = (char *) PyObject_MALLOC(len + 1);
    if (len > 0)
        strncpy(str, a, len);
    str[len] = '\0';
    ....
    // 将 token 添加到 parser tree 中
    if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
                               &(err_ret->expected))) != E_OK) {
            if (err_ret->error != E_DONE) {
                PyObject_FREE(str);
                err_ret->token = type;
            }
            break;
        }
    ....
    if (err_ret->error == E_DONE) {
        n = ps->p_tree;
        ps->p_tree = NULL;
    }
    else
        n = NULL;
    ....
    done:
        PyTokenizer_Free(tok); // 释放词法分析器
    return n; // 返回 parser->p_tree
}

以 a = 1 为例，循环5次得到的 str 和 type 依次为：

str: a, type: 1(NAME)
str: =, type: 22(EQUAL)
str: 1, type: 2(NUMBER)
str: \n, type: 4(NEWLINE)
str: 未初始化, type: 0(ENDMARKER)

node 的结构定义在 /Include/node.h 中

typedef struct _node {
    short       n_type;
    char        *n_str;
    int         n_lineno;
    int         n_col_offset;
    int         n_nchildren;
    struct _node    *n_child;
} node;

Add TOKEN to Parser Tree

使用 export PYTHONDEBUG=1 开启 python 的调试模式，可以看到 python 将 TOKEN 解析为 Parser Tree 的过程。

./Python-2.7.12/python -d -c 'a = 1'
Token NAME/'a' ... It's a token we know
....
 DFA 'atom', state 0: Shift.
  DFA 'atom', state 5: Direct pop.
Token EQUAL/'=' ... It's a token we know
....
 DFA 'expr_stmt', state 1: Shift.
Token NUMBER/'1' ... It's a token we know
....
  DFA 'atom', state 5: Direct pop.
Token NEWLINE/'' ... It's a token we know
....
  DFA 'simple_stmt', state 3: Direct pop.
  DFA 'stmt', state 1: Direct pop.
Token NEWLINE/'' ... It's a token we know
 DFA 'file_input', state 0: Shift.
Token ENDMARKER/'' ... It's a token we know
 DFA 'file_input', state 0: Shift.
  DFA 'file_input', state 1: Direct pop.
  ACCEPT.
[35150 refs]

可以看到只有正确解析的 TOKEN 才会显示 "Direct pop

生成 TOKEN 后，按照语法规则生成 Parser Tree

// Parser/parser.c PyParser_AddToken
int
PyParser_AddToken(register parser_state *ps, register int type, char *str,
                  int lineno, int col_offset, int *expected_ret)
{
    ...
    /* Loop until the token is shifted or an error occurred */
    for (;;) {
        ...
        /* Pop while we are in an accept-only state */
                while (s = &d->d_state
                                [ps->p_stack.s_top->s_state],
                    s->s_accept && s->s_narcs == 1) {
                    D(printf("  DFA '%s', state %d: "
                             "Direct pop.\n",
                             d->d_name,
                             ps->p_stack.s_top->s_state));
        ...
    }
}

所以上面 Tree 的构建顺序为 atom expr_stmt atom simple_stmt stmt file_input

Parser Tree to AST

在上面的 PyParser_ASTFromString 函数中可以看到，在调用 PyParser_ParseStringFlagsFilenameEx 生成 Parser Tree ( concrete syntax tree ) 后，调用 PyAST_FromNode 生成 AST。

// ast.c PyAST_FromNode
mod_ty
PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
               PyArena *arena)
{
    ...
    stmts = asdl_seq_new(num_stmts(n), arena); /* 为 AST 申请内存 */
    if (!stmts)
        return NULL;
    for (i = 0; i < NCH(n) - 1; i++) {
        ch = CHILD(n, i);
        if (TYPE(ch) == NEWLINE)
            continue;
        REQ(ch, stmt);
        num = num_stmts(ch);
        if (num == 1) {
            s = ast_for_stmt(&c, ch); /* 生成对应的 AST Node */
            if (!s)
                goto error;
            asdl_seq_SET(stmts, k++, s); /* 将对应的 AST Node 设置到 asdl_seq 中对应的位置 */
        }
        ...
        return Module(stmts, arena); /* 转换 asdl_seq 为 mod_ty， mod_ty 事实上只是一个对
                                        asdl_seq 的简单封装，给 asdl_seq 加上了类型而已 */
    }
    ...
}

asdl_seq 结构定义在 Include/asdl.h 中，其实就是一个放 AST Node 的数组，根据注释可以看出，这个文件是由 Parser/asdl_c.py 生成的。

/* It would be nice if the code generated by asdl_c.py was completely
   independent of Python, but it is a goal the requires too much work
   at this stage.  So, for example, I'll represent identifiers as
   interned Python strings.
*/

/* XXX A sequence should be typed so that its use can be typechecked. */

typedef struct {
    int size;
    void *elements[1];
} asdl_seq;

asdl_c.py 将 Parser/Python.asdl 中定义的 asdl 生成 C 代码，对应的 asdl 的实现在 Parser/asdl.py 中。

ast_for_xxx 是生成对应 xxx AST Node 的函数， ast_for_stmt 就是生成 stmt node (stmt_ty 由 asdl_c.py 生成)。

ast_for_stmt 其实就是一个巨大的 switch ，根据语法规则调用不同的 ast_for_xxx, 例如 a = 1，就会调用到 ast_for_expr

a = 1 生成 AST 的 callstack 如下

ast_for_stmt
ast_for_expr_stmt
ast_for_testlist
ast_for_expr

最后返回的 AST 可以由下面的 Python 代码查看

import ast
t = ast.parse('a = 1')
ast.dump(t)

>>> "Module(body=[Assign(targets=[Name(id='a', ctx=Store())], value=Num(n=1))])"

AST to CodeObject

上面分析到 PyParser_ASTFromString 将源码解析为 AST (mod_ty)，调用 run_mod 函数。

run_mod 中有两个主要的函数:

PyAST_Compile,
PyEval_EvalCode

PyAST_Compile 将 AST 编译成 CodeObject，PyEval_EvalCode 运行编译后的 CodeObject。

// Python/pythonrun.c run_mod
static PyObject *
run_mod(mod_ty mod, const char *filename, PyObject *globals, PyObject *locals,
         PyCompilerFlags *flags, PyArena *arena)
{
    PyCodeObject *co;
    PyObject *v;
    co = PyAST_Compile(mod, filename, flags, arena);
    if (co == NULL)
        return NULL;
    v = PyEval_EvalCode(co, globals, locals);
    Py_DECREF(co);
    return v;
}

我们先看一下编译的过程

Build Symbol Table

PyAST_Compile 里主要使用 PySymtable_Build, compiler_mod 两个函数

// Python/compile.c PyAST_Compile
PyCodeObject *
PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags,
              PyArena *arena)
{
    struct compiler c; // 初始化 compiler 结构
    PyCodeObject *co = NULL;
    PyCompilerFlags local_flags;
    ...

    if (!compiler_init(&c))
        return NULL;
    c.c_filename = filename;
    c.c_arena = arena;
    c.c_future = PyFuture_FromAST(mod, filename);
    ...

    c.c_st = PySymtable_Build(mod, filename, c.c_future);

    co = compiler_mod(&c, mod);

 finally:
    compiler_free(&c);
    assert(co || PyErr_Occurred());
    return co;
}

PySymtable_Build 遍历 AST 构建 Symbol table，存储一些类名、变量名之类的信息。

struct symtable {
    const char *st_filename; /* name of file being compiled */
    struct _symtable_entry *st_cur; /* current symbol table entry */
    struct _symtable_entry *st_top; /* module entry */
    PyObject *st_symbols;    /* dictionary of symbol table entries */
    PyObject *st_stack;      /* stack of namespace info */
    PyObject *st_global;     /* borrowed ref to MODULE in st_symbols */
    int st_nblocks;          /* number of blocks */
    PyObject *st_private;        /* name of current class or NULL */
    PyFutureFeatures *st_future; /* module's future features */
};

typedef struct _symtable_entry {
    PyObject_HEAD
    PyObject *ste_id;        /* int: key in st_symbols */
    PyObject *ste_symbols;   /* dict: name to flags */
    PyObject *ste_name;      /* string: name of block */
    PyObject *ste_varnames;  /* list of variable names */
    PyObject *ste_children;  /* list of child ids */
    _Py_block_ty ste_type;   /* module, class, or function */
    int ste_unoptimized;     /* false if namespace is optimized */
    int ste_nested;      /* true if block is nested */
    unsigned ste_free : 1;        /* true if block has free variables */
    unsigned ste_child_free : 1;  /* true if a child block has free vars,
                                     including free refs to globals */
    unsigned ste_generator : 1;   /* true if namespace is a generator */
    unsigned ste_varargs : 1;     /* true if block has varargs */
    unsigned ste_varkeywords : 1; /* true if block has varkeywords */
    unsigned ste_returns_value : 1;  /* true if namespace uses return with
                                        an argument */
    int ste_lineno;          /* first line of block */
    int ste_opt_lineno;      /* lineno of last exec or import * */
    int ste_tmpname;         /* counter for listcomp temp vars */
    struct symtable *ste_table;
} PySTEntryObject;

Build CFG

AST 传入 compiler_mod 后会调用 compiler_XXX 生成 CFG。然后 assemble 会从 CFG 生成最后的 bytecode。

// Python/compiler.c compiler_mod 
static PyCodeObject *
compiler_mod(struct compiler *c, mod_ty mod)
{
    PyCodeObject *co;
    int addNone = 1;
    ...
    if (!compiler_enter_scope(c, module, mod, 0))
        return NULL;
    switch (mod->kind) {
    case Module_kind:
        if (!compiler_body(c, mod->v.Module.body)) {
            compiler_exit_scope(c);
            return 0;
        }
        break;
    ...
    }
    co = assemble(c, addNone);
    compiler_exit_scope(c);
    return co;
}

根据上面 AST 生成一节， a = 1 的 AST 为 Module(body=[Assign(targets=[Name(id='a', ctx=Store())], value=Num(n=1))])，所以将会执行 compiler_body。

// Python/compiler.c compiler_body
static int
compiler_body(struct compiler *c, asdl_seq *stmts)
{
    ...
    for (; i < asdl_seq_LEN(stmts); i++)
        VISIT(c, stmt, (stmt_ty)asdl_seq_GET(stmts, i));
    ...
}

VISIT 是定义在 compiler.c 中的一个宏，根据 AST Node 的 Type，调用不同的 compiler_visit_xxx 函数。

#define VISIT(C, TYPE, V) {\
    if (!compiler_visit_ ## TYPE((C), (V))) \
        return 0; \
}

这里 VISIT 宏展开是 compiler_visit_stmt。

// Python/compiler.c compiler_visit_stmt
static int
compiler_visit_stmt(struct compiler *c, stmt_ty s)
{
    ...
    switch (s->kind) {
        case Assign_kind:
            n = asdl_seq_LEN(s->v.Assign.targets);
            VISIT(c, expr, s->v.Assign.value);
            for (i = 0; i < n; i++) {
                if (i < n - 1)
                    ADDOP(c, DUP_TOP);
                VISIT(c, expr,
                    (expr_ty)asdl_seq_GET(s->v.Assign.targets, i));
            }
            break;
    }
    ...
}

首先调用 VISIT(c, expr, s->v.Assign.value); 也就是 compiler_visit_expr 函数。传入的参数为 Assign.value，根据 AST value=Num(n=1)。可以在 gdb 中验证。

>> p e.kind
$2 = Num_kind

// Python/compiler.c compiler_visit_expr
static int
compiler_visit_expr(struct compiler *c, expr_ty e)
{
    switch (e->kind) {
        case Num_kind:
        ADDOP_O(c, LOAD_CONST, e->v.Num.n, consts);
        break;
    }
}

ADDOP_O 是一个宏，用于将 opcode 添加到 compiler 结构中。添加 opcode 有以下几个宏：

// 添加一个 opcode
#define ADDOP(C, OP) { \
    if (!compiler_addop((C), (OP))) \
        return 0; \
}


#define ADDOP_IN_SCOPE(C, OP) { \
    if (!compiler_addop((C), (OP))) { \
        compiler_exit_scope(c); \
        return 0; \
    } \
}

// 添加一个 opcode ，带一个没有具体名字的参数，`a=1` 中的 value(Num(1))
#define ADDOP_O(C, OP, O, TYPE) { \
    if (!compiler_addop_o((C), (OP), (C)->u->u_ ## TYPE, (O))) \
        return 0; \
}

// 添加一个 opcode， 带一个有名字的参数，如变量名
#define ADDOP_NAME(C, OP, O, TYPE) { \
    if (!compiler_addop_name((C), (OP), (C)->u->u_ ## TYPE, (O))) \
        return 0; \
}

#define ADDOP_I(C, OP, O) { \
    if (!compiler_addop_i((C), (OP), (O))) \
        return 0; \
}

#define ADDOP_JABS(C, OP, O) { \
    if (!compiler_addop_j((C), (OP), (O), 1)) \
        return 0; \
}

#define ADDOP_JREL(C, OP, O) { \
    if (!compiler_addop_j((C), (OP), (O), 0)) \
        return 0; \
}

所有带参数的 opcode 最后都会调用到 compiler_addop_i，AST 中的参数是数字或者变量名，到了 opcode 中都是一个 integer，为对应值在 compiler 结构中的下标。

对应的 opcode 定义在 include/opcode.h 中，如 opcode =100 的定义为 #define LOAD_CONST 100 /* Index in const list */

/* Add an opcode with an integer argument.
   Returns 0 on failure, 1 on success.
*/
static int
compiler_addop_i(struct compiler *c, int opcode, int oparg)
{
    struct instr *i;
    int off;
    off = compiler_next_instr(c, c->u->u_curblock);
    if (off < 0)
        return 0;
    i = &c->u->u_curblock->b_instr[off];
    i->i_opcode = opcode;
    i->i_oparg = oparg;
    i->i_hasarg = 1;
    compiler_set_lineno(c, off);
    return 1;
}

assemble

compile_mod 中，compiler_xxx 构建出了 CFG 调用 assemble 函数构建出最后的 CodeObject。

static PyCodeObject *
assemble(struct compiler *c, int addNone)
{
    basicblock *b, *entryblock;
    struct assembler a;
    int i, j, nblocks;
    PyCodeObject *co = NULL;

    /* Make sure every block that falls off the end returns None.
       XXX NEXT_BLOCK() isn't quite right, because if the last
       block ends with a jump or return b_next shouldn't set.
     */
    if (!c->u->u_curblock->b_return) {
        NEXT_BLOCK(c);
        if (addNone)
            ADDOP_O(c, LOAD_CONST, Py_None, consts);
        ADDOP(c, RETURN_VALUE);
    }

    // 找到 entryblock
    nblocks = 0;
    entryblock = NULL;
    for (b = c->u->u_blocks; b != NULL; b = b->b_list) {
        nblocks++;
        entryblock = b;
    }
    ...
    if (!assemble_init(&a, nblocks, c->u->u_firstlineno))
        goto error;
    dfs(c, entryblock, &a);

    /* Can't modify the bytecode after computing jump offsets. */
    assemble_jump_offsets(&a, c);

    /* Emit code in reverse postorder from dfs. */
    for (i = a.a_nblocks - 1; i >= 0; i--) {
        b = a.a_postorder[i];
        for (j = 0; j < b->b_iused; j++)
            if (!assemble_emit(&a, &b->b_instr[j]))
                goto error;
    }

    if (_PyString_Resize(&a.a_lnotab, a.a_lnotab_off) < 0)
        goto error;
    if (_PyString_Resize(&a.a_bytecode, a.a_offset) < 0)
        goto error;

    co = makecode(c, &a);
 error:
    assemble_free(&a);
    return co;
}

assemble 中先调用 dfs 以后序深度优先遍历 struct compiler(CFG)，将 CFG 平坦化，然后调用 assemble_jump_offsets 生成跳转地址。最后调用 makecode 生成 CodeObject。

调试的过程中，可以使用 dis 模块来查看源码对应的字节码。将源码写入 test.py，然后调用 python -m dis test.py 即可。

echo 'a=1' > test.py
python2 -m dis test.py                                                   
  1           0 LOAD_CONST               0 (1)
              3 STORE_NAME               0 (a)
              6 LOAD_CONST               1 (None)
              9 RETURN_VALUE

CodeObject 的结构定义在 include/code.h 中

/* Bytecode object */
typedef struct {
    PyObject_HEAD
    int co_argcount;        /* #arguments, except *args */
    int co_nlocals;     /* #local variables */
    int co_stacksize;       /* #entries needed for evaluation stack */
    int co_flags;       /* CO_..., see below */
    PyObject *co_code;      /* instruction opcodes */
    PyObject *co_consts;    /* list (constants used) */
    PyObject *co_names;     /* list of strings (names used) */
    PyObject *co_varnames;  /* tuple of strings (local variable names) */
    PyObject *co_freevars;  /* tuple of strings (free variable names) */
    PyObject *co_cellvars;      /* tuple of strings (cell variable names) */
    /* The rest doesn't count for hash/cmp */
    PyObject *co_filename;  /* string (where it was loaded from) */
    PyObject *co_name;      /* string (name, for reference) */
    int co_firstlineno;     /* first source line number */
    PyObject *co_lnotab;    /* string (encoding addr<->lineno mapping) See
                   Objects/lnotab_notes.txt for details. */
    void *co_zombieframe;     /* for optimization only (see frameobject.c) */
    PyObject *co_weakreflist;   /* to support weakrefs to code objects */
} PyCodeObject;

Eval CodeObject

run_mod 中构建出 CodeObject 后调用 PyEval_EvalCode 运行 CodeObject。可以看到， PyEval_EvalCode 只是简单调用了 PyEval_EvalCodeEx。

PyObject *
PyEval_EvalCode(PyCodeObject *co, PyObject *globals, PyObject *locals)
{
    return PyEval_EvalCodeEx(co,
                      globals, locals,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      NULL);
}

PyEval_EvalCodeEx 构建出 PyFrameObject 然后执行 PyEval_EvalFrameEx

// Python/ceval.c PyEval_EvalCodeEx

PyObject *
PyEval_EvalCodeEx(PyCodeObject *co, PyObject *globals, PyObject *locals,
           PyObject **args, int argcount, PyObject **kws, int kwcount,
           PyObject **defs, int defcount, PyObject *closure)
{
    register PyFrameObject *f;
    ...
    retval = PyEval_EvalFrameEx(f,0);
    ...
    return retval;
}

FrameObject

PyFrameObject 定义在 include/frameobject.h 中

typedef struct _frame {
    PyObject_VAR_HEAD
    struct _frame *f_back;  /* previous frame, or NULL */
    PyCodeObject *f_code;   /* code segment */
    PyObject *f_builtins;   /* builtin symbol table (PyDictObject) */
    PyObject *f_globals;    /* global symbol table (PyDictObject) */
    PyObject *f_locals;     /* local symbol table (any mapping) */
    PyObject **f_valuestack;    /* points after the last local */
    /* Next free slot in f_valuestack.  Frame creation sets to f_valuestack.
       Frame evaluation usually NULLs it, but a frame that yields sets it
       to the current stack top. */
    PyObject **f_stacktop;
    PyObject *f_trace;      /* Trace function */

    /* If an exception is raised in this frame, the next three are used to
     * record the exception info (if any) originally in the thread state.  See
     * comments before set_exc_info() -- it's not obvious.
     * Invariant:  if _type is NULL, then so are _value and _traceback.
     * Desired invariant:  all three are NULL, or all three are non-NULL.  That
     * one isn't currently true, but "should be".
     */
    PyObject *f_exc_type, *f_exc_value, *f_exc_traceback;

    PyThreadState *f_tstate;
    int f_lasti;        /* Last instruction if called */
    /* Call PyFrame_GetLineNumber() instead of reading this field
       directly.  As of 2.3 f_lineno is only valid when tracing is
       active (i.e. when f_trace is set).  At other times we use
       PyCode_Addr2Line to calculate the line from the current
       bytecode index. */
    int f_lineno;       /* Current line number */
    int f_iblock;       /* index in f_blockstack */
    PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
    PyObject *f_localsplus[1];  /* locals+stack, dynamically sized */
} PyFrameObject;

可以看到 FrameObject 中存储了对应的字节码，local、global、builtin 三种变量，以及数据栈等运行时必须的信息。

PyTryBlock 中用来处理 Try 和 Loop（循环）语句。在 break continue return 等时候可以跳转到 b_handler 记录的位置继续执行。

typedef struct {
    int b_type;         /* what kind of block this is */
    int b_handler;      /* where to jump to find handler */
    int b_level;        /* value stack level to pop to */
} PyTryBlock;

PyEval_EvalFrameEx

PyEval_EvalFrameEx 是 CPython 解释器最主要的求值函数，核心是一个循环里的巨大的 switch case，对不同的 opcode 执行不同的操作。

下面摘抄部分 PyEval_EvalFrameEx 代码，忽略 profile 和 debug 相关功能的代码。

可以看到在 PyEval_EvalFrameEx 的 for 循环中，先判断了锁的状态，确保同一时间只有一个线程访问解释器，然后通过 NEXTOP 等宏操作 next_instr 指针，以执行不同的字节码。

// Python/ceval.c PyEval_EvalFrameEx

PyObject *
PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
{
    ...
    register PyObject **stack_pointer;  /* Next free slot in value stack */
    register unsigned char *next_instr;
    register int opcode;        /* Current opcode */
    register int oparg;         /* Current opcode argument, if any */
    register enum why_code why; /* Reason for block stack unwind */
    register int err;           /* Error status -- nonzero if error */
    register PyObject *x;       /* Result object -- NULL if error */
    register PyObject *v;       /* Temporary objects popped off stack */
    register PyObject *w;
    register PyObject *u;
    register PyObject *t;
    register PyObject *stream = NULL;    /* for PRINT opcodes */
    register PyObject **fastlocals, **freevars;
    PyObject *retval = NULL;            /* Return value */
    PyThreadState *tstate = PyThreadState_GET();
    PyCodeObject *co;

    unsigned char *first_instr;
    PyObject *names;
    PyObject *consts;
    ...
    /* Start of code */

    if (f == NULL)
        return NULL;

    /* push frame */
    if (Py_EnterRecursiveCall(""))
        return NULL;

    tstate->frame = f;
    ...
    co = f->f_code;
    names = co->co_names;
    consts = co->co_consts;
    fastlocals = f->f_localsplus;
    freevars = f->f_localsplus + co->co_nlocals;
    first_instr = (unsigned char*) PyString_AS_STRING(co->co_code);
    next_instr = first_instr + f->f_lasti + 1;
    stack_pointer = f->f_stacktop;
    assert(stack_pointer != NULL);
    f->f_stacktop = NULL;       /* remains NULL unless yield suspends frame */
    ...
    why = WHY_NOT;
    err = 0;
    x = Py_None;        /* Not a reference, just anything non-NULL */
    w = NULL;

    for (;;) {
        ...
            if (interpreter_lock) {
                /* Give another thread a chance */

                if (PyThreadState_Swap(NULL) != tstate)
                    Py_FatalError("ceval: tstate mix-up");
                PyThread_release_lock(interpreter_lock);

                /* Other threads may run now */

                PyThread_acquire_lock(interpreter_lock, 1);

                if (PyThreadState_Swap(tstate) != NULL)
                    Py_FatalError("ceval: orphan tstate");

                /* Check for thread interrupts */

                if (tstate->async_exc != NULL) {
                    x = tstate->async_exc;
                    tstate->async_exc = NULL;
                    PyErr_SetNone(x);
                    Py_DECREF(x);
                    why = WHY_EXCEPTION;
                    goto on_error;
                }
            }
        ...
         /* Extract opcode and argument */

        opcode = NEXTOP();
        oparg = 0;   /* allows oparg to be stored in a register because
            it doesn't have to be remembered across a full loop */
        if (HAS_ARG(opcode))
            oparg = NEXTARG();
    dispatch_opcode:
        /* Main switch on opcode */

        switch (opcode) {

        /* BEWARE!
           It is essential that any operation that fails sets either
           x to NULL, err to nonzero, or why to anything but WHY_NOT,
           and that no operation that succeeds does this! */

        /* case STOP_CODE: this is an error! */

        TARGET_NOARG(NOP)
        {
            FAST_DISPATCH();
        }

        TARGET(LOAD_FAST)
        {
            x = GETLOCAL(oparg);
            if (x != NULL) {
                Py_INCREF(x);
                PUSH(x);
                FAST_DISPATCH();
            }
            format_exc_check_arg(PyExc_UnboundLocalError,
                UNBOUNDLOCAL_ERROR_MSG,
                PyTuple_GetItem(co->co_varnames, oparg));
            break;
        }
        ... // 省略剩余 opcode 对应的 case
    }/* switch */

        /* End the loop if we still have an error (or return) */

        if (why != WHY_NOT)
            break;

    } /* main loop */

    assert(why != WHY_YIELD);
    /* Pop remaining stack entries. */
    while (!EMPTY()) {
        v = POP();
        Py_XDECREF(v);
    }

    if (why != WHY_RETURN)
        retval = NULL;

    return retval;
}

其中 why 变量是一个表示 main loop 状态的枚举值。

/* Status code for main loop (reason for stack unwind) */
enum why_code {
        WHY_NOT =       0x0001, /* No error */
        WHY_EXCEPTION = 0x0002, /* Exception occurred */
        WHY_RERAISE =   0x0004, /* Exception re-raised by 'finally' */
        WHY_RETURN =    0x0008, /* 'return' statement */
        WHY_BREAK =     0x0010, /* 'break' statement */
        WHY_CONTINUE =  0x0020, /* 'continue' statement */
        WHY_YIELD =     0x0040  /* 'yield' operator */
};

其中 PUSH POP 都是对 stack_pointer 操作的宏，stack_pointer 指向 FrameObject 中的运行时栈，初始化时指向栈顶。

stack_pointer = f->f_stacktop;

NEXTOP 以及 JUMPTO 都是对 next_instr 进行操作的宏，如

#define NEXTOP() (*next_instr++)

控制下一个执行的 opcode

而 #define NEXTARG() (next_instr += 2, (next_instr[-1]<<8) + next_instr[-2]) 是从 opcode 中提出参数。

可以看出， CPython 虚拟机是基于栈、支持多线程和协程(yield)，并且支持异常处理，和许多语言特性。