`
RednaxelaFX
  • 浏览: 3015231 次
  • 性别: Icon_minigender_1
  • 来自: 海外
社区版块
存档分类
最新评论

吉里吉里2中TJS2 VM的dispatch loop

阅读更多
稍微在吉里吉里2.28的源代码里找了下TJS2 VM的执行机制,主要着眼于dispatch loop的实现,并且找到了下面的代码:

kirikiri2\src\core\tjs2\tjsInterCodeExec.cpp/972:
tjs_int tTJSInterCodeContext::ExecuteCode(tTJSVariant *ra_org, tjs_int startip,
    tTJSVariant **args, tjs_int numargs, tTJSVariant *result)
{
    // execute VM codes
    tjs_int32 *codesave;
    try
    {
        tjs_int32 *code = codesave = CodeArea + startip;

        if(TJSStackTracerEnabled()) TJSStackTracerSetCodePointer(CodeArea, &codesave);

        tTJSVariant *ra = ra_org;
        tTJSVariant *da = DataArea;

        bool flag = false;

        while(true)
        {
            codesave = code;
            switch(*code)
            {
            case VM_NOP:
                code ++;
                break;

            case VM_CONST:
                TJS_GET_VM_REG(ra, code[1]).CopyRef(TJS_GET_VM_REG(da, code[2]));
                code += 3;
                break;

            case VM_CP:
                TJS_GET_VM_REG(ra, code[1]).CopyRef(TJS_GET_VM_REG(ra, code[2]));
                code += 3;
                break;

            case VM_CL:
                TJS_GET_VM_REG(ra, code[1]).Clear();
                code += 2;
                break;

            case VM_CCL:
                ContinuousClear(ra, code);
                code += 3;
                break;

            case VM_TT:
                flag = TJS_GET_VM_REG(ra, code[1]).operator bool();
                code += 2;
                break;

            case VM_TF:
                flag = !(TJS_GET_VM_REG(ra, code[1]).operator bool());
                code += 2;
                break;

            case VM_CEQ:
                flag = TJS_GET_VM_REG(ra, code[1]).NormalCompare(
                    TJS_GET_VM_REG(ra, code[2]));
                code += 3;
                break;

            case VM_CDEQ:
                flag = TJS_GET_VM_REG(ra, code[1]).DiscernCompare(
                    TJS_GET_VM_REG(ra, code[2]));
                code += 3;
                break;

            case VM_CLT:
                flag = TJS_GET_VM_REG(ra, code[1]).GreaterThan(
                    TJS_GET_VM_REG(ra, code[2]));
                code += 3;
                break;

            case VM_CGT:
                flag = TJS_GET_VM_REG(ra, code[1]).LittlerThan(
                    TJS_GET_VM_REG(ra, code[2]));
                code += 3;
                break;

            case VM_SETF:
                TJS_GET_VM_REG(ra, code[1]) = flag;
                code += 2;
                break;

            case VM_SETNF:
                TJS_GET_VM_REG(ra, code[1]) = !flag;
                code += 2;
                break;

            case VM_LNOT:
                TJS_GET_VM_REG(ra, code[1]).logicalnot();
                code += 2;
                break;

            case VM_NF:
                flag = !flag;
                code ++;
                break;

            case VM_JF:
                if(flag)
                    TJS_ADD_VM_CODE_ADDR(code, code[1]);
                else
                    code += 2;
                break;

            case VM_JNF:
                if(!flag)
                    TJS_ADD_VM_CODE_ADDR(code, code[1]);
                else
                    code += 2;
                break;

            case VM_JMP:
                TJS_ADD_VM_CODE_ADDR(code, code[1]);
                break;

            case VM_INC:
                TJS_GET_VM_REG(ra, code[1]).increment();
                code += 2;
                break;

            case VM_INCPD:
                OperatePropertyDirect0(ra, code, TJS_OP_INC);
                code += 4;
                break;

            case VM_INCPI:
                OperatePropertyIndirect0(ra, code, TJS_OP_INC);
                code += 4;
                break;

            case VM_INCP:
                OperateProperty0(ra, code, TJS_OP_INC);
                code += 3;
                break;

            case VM_DEC:
                TJS_GET_VM_REG(ra, code[1]).decrement();
                code += 2;
                break;

            case VM_DECPD:
                OperatePropertyDirect0(ra, code, TJS_OP_DEC);
                code += 4;
                break;

            case VM_DECPI:
                OperatePropertyIndirect0(ra, code, TJS_OP_DEC);
                code += 4;
                break;

            case VM_DECP:
                OperateProperty0(ra, code, TJS_OP_DEC);
                code += 3;
                break;

#define TJS_DEF_VM_P(vmcode, rope) \
            case VM_##vmcode: \
                TJS_GET_VM_REG(ra, code[1]).rope(TJS_GET_VM_REG(ra, code[2])); \
                code += 3; \
                break; \
            case VM_##vmcode##PD: \
                OperatePropertyDirect(ra, code, TJS_OP_##vmcode); \
                code += 5; \
                break; \
            case VM_##vmcode##PI: \
                OperatePropertyIndirect(ra, code, TJS_OP_##vmcode); \
                code += 5; \
                break; \
            case VM_##vmcode##P: \
                OperateProperty(ra, code, TJS_OP_##vmcode); \
                code += 4; \
                break

                TJS_DEF_VM_P(LOR, logicalorequal);
                TJS_DEF_VM_P(LAND, logicalandequal);
                TJS_DEF_VM_P(BOR, operator |=);
                TJS_DEF_VM_P(BXOR, operator ^=);
                TJS_DEF_VM_P(BAND, operator &=);
                TJS_DEF_VM_P(SAR, operator >>=);
                TJS_DEF_VM_P(SAL, operator <<=);
                TJS_DEF_VM_P(SR, rbitshiftequal);
                TJS_DEF_VM_P(ADD, operator +=);
                TJS_DEF_VM_P(SUB, operator -=);
                TJS_DEF_VM_P(MOD, operator %=);
                TJS_DEF_VM_P(DIV, operator /=);
                TJS_DEF_VM_P(IDIV, idivequal);
                TJS_DEF_VM_P(MUL, operator *=);

#undef TJS_DEF_VM_P

            case VM_BNOT:
                TJS_GET_VM_REG(ra, code[1]).bitnot();
                code += 2;
                break;

            case VM_ASC:
                CharacterCodeOf(TJS_GET_VM_REG(ra, code[1]));
                code += 2;
                break;

            case VM_CHR:
                CharacterCodeFrom(TJS_GET_VM_REG(ra, code[1]));
                code += 2;
                break;

            case VM_NUM:
                TJS_GET_VM_REG(ra, code[1]).tonumber();
                code += 2;
                break;

            case VM_CHS:
                TJS_GET_VM_REG(ra, code[1]).changesign();
                code += 2;
                break;

            case VM_INV:
                TJS_GET_VM_REG(ra, code[1]) =
                    (TJS_GET_VM_REG(ra,
                    code[1]).AsObjectClosureNoAddRef().Invalidate(0,
                    NULL, NULL, ra[-1].AsObjectNoAddRef()) == TJS_S_TRUE);
                code += 2;
                break;

            case VM_CHKINV:
                TJS_GET_VM_REG(ra, code[1]) =
                    TJSIsObjectValid(TJS_GET_VM_REG(ra,
                    code[1]).AsObjectClosureNoAddRef().IsValid(0,
                    NULL, NULL, ra[-1].AsObjectNoAddRef()));
                code += 2;
                break;

            case VM_INT:
                TJS_GET_VM_REG(ra, code[1]).ToInteger();
                code += 2;
                break;

            case VM_REAL:
                TJS_GET_VM_REG(ra, code[1]).ToReal();
                code += 2;
                break;

            case VM_STR:
                TJS_GET_VM_REG(ra, code[1]).ToString();
                code += 2;
                break;

            case VM_OCTET:
                TJS_GET_VM_REG(ra, code[1]).ToOctet();
                code += 2;
                break;

            case VM_TYPEOF:
                TypeOf(TJS_GET_VM_REG(ra, code[1]));
                code += 2;
                break;

            case VM_TYPEOFD:
                TypeOfMemberDirect(ra, code, TJS_MEMBERMUSTEXIST);
                code += 4;
                break;

            case VM_TYPEOFI:
                TypeOfMemberIndirect(ra, code, TJS_MEMBERMUSTEXIST);
                code += 4;
                break;

            case VM_EVAL:
                Eval(TJS_GET_VM_REG(ra, code[1]),
                    TJSEvalOperatorIsOnGlobal ? NULL : ra[-1].AsObjectNoAddRef(),
                    true);
                code += 2;
                break;

            case VM_EEXP:
                Eval(TJS_GET_VM_REG(ra, code[1]),
                    TJSEvalOperatorIsOnGlobal ? NULL : ra[-1].AsObjectNoAddRef(),
                    false);
                code += 2;
                break;

            case VM_CHKINS:
                InstanceOf(TJS_GET_VM_REG(ra, code[2]),
                    TJS_GET_VM_REG(ra, code[1]));
                code += 3;
                break;

            case VM_CALL:
            case VM_NEW:
                code += CallFunction(ra, code, args, numargs);
                break;

            case VM_CALLD:
                code += CallFunctionDirect(ra, code, args, numargs);
                break;

            case VM_CALLI:
                code += CallFunctionIndirect(ra, code, args, numargs);
                break;

            case VM_GPD:
                GetPropertyDirect(ra, code, 0);
                code += 4;
                break;

            case VM_GPDS:
                GetPropertyDirect(ra, code, TJS_IGNOREPROP);
                code += 4;
                break;

            case VM_SPD:
                SetPropertyDirect(ra, code, 0);
                code += 4;
                break;

            case VM_SPDE:
                SetPropertyDirect(ra, code, TJS_MEMBERENSURE);
                code += 4;
                break;

            case VM_SPDEH:
                SetPropertyDirect(ra, code, TJS_MEMBERENSURE|TJS_HIDDENMEMBER);
                code += 4;
                break;

            case VM_SPDS:
                SetPropertyDirect(ra, code, TJS_MEMBERENSURE|TJS_IGNOREPROP);
                code += 4;
                break;

            case VM_GPI:
                GetPropertyIndirect(ra, code, 0);
                code += 4;
                break;

            case VM_GPIS:
                GetPropertyIndirect(ra, code, TJS_IGNOREPROP);
                code += 4;
                break;

            case VM_SPI:
                SetPropertyIndirect(ra, code, 0);
                code += 4;
                break;

            case VM_SPIE:
                SetPropertyIndirect(ra, code, TJS_MEMBERENSURE);
                code += 4;
                break;

            case VM_SPIS:
                SetPropertyIndirect(ra, code, TJS_MEMBERENSURE|TJS_IGNOREPROP);
                code += 4;
                break;

            case VM_GETP:
                GetProperty(ra, code);
                code += 3;
                break;

            case VM_SETP:
                SetProperty(ra, code);
                code += 3;
                break;

            case VM_DELD:
                DeleteMemberDirect(ra, code);
                code += 4;
                break;

            case VM_DELI:
                DeleteMemberIndirect(ra, code);
                code += 4;
                break;

            case VM_SRV:
                if(result) result->CopyRef(TJS_GET_VM_REG(ra, code[1]));
                code += 2;
                break;

            case VM_RET:
                return code+1-CodeArea;

            case VM_ENTRY:
                code = CodeArea + ExecuteCodeInTryBlock(ra, code-CodeArea + 3, args,
                    numargs, result, TJS_FROM_VM_CODE_ADDR(code[1])+code-CodeArea,
                    TJS_FROM_VM_REG_ADDR(code[2]));
                break;

            case VM_EXTRY:
                return code+1-CodeArea;  // same as ret

            case VM_THROW:
                ThrowScriptException(TJS_GET_VM_REG(ra, code[1]),
                    Block, CodePosToSrcPos(code-CodeArea));
                code += 2; // actually here not proceed...
                break;

            case VM_CHGTHIS:
                TJS_GET_VM_REG(ra, code[1]).ChangeClosureObjThis(
                    TJS_GET_VM_REG(ra, code[2]).AsObjectNoAddRef());
                code += 3;
                break;

            case VM_GLOBAL:
                TJS_GET_VM_REG(ra, code[1]) = Block->GetTJS()->GetGlobalNoAddRef();
                code += 2;
                break;

            case VM_ADDCI:
                AddClassInstanceInfo(ra, code);
                code+=3;
                break;

            case VM_REGMEMBER:
                RegisterObjectMember(ra[-1].AsObjectNoAddRef());
                code ++;
                break;

            case VM_DEBUGGER:
                TJSNativeDebuggerBreak();
                code ++;
                break;

            default:
                ThrowInvalidVMCode();
            }
        }
    }
    catch(eTJSSilent &e)
    {
        throw e;
    }
    catch(eTJSScriptException &e)
    {
        e.AddTrace(this, codesave-CodeArea);
        throw e;
    }
    catch(eTJSScriptError &e)
    {
        e.AddTrace(this, codesave-CodeArea);
        throw e;
    }
    catch(eTJS &e)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(e.GetMessage(), this, codesave-CodeArea);
    }
    catch(exception &e)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(e.what(), this, codesave-CodeArea);
    }
    catch(const wchar_t *text)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(text, this, codesave-CodeArea);
    }
    catch(const char *text)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(text, this, codesave-CodeArea);
    }
#ifdef TJS_SUPPORT_VCL
    catch(const EAccessViolation &e)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(e.Message.c_str(), this, codesave-CodeArea);
    }
    catch(const Exception &e)
    {
        DisplayExceptionGeneratedCode(codesave - CodeArea, ra_org);
        TJS_eTJSScriptError(e.Message.c_str(), this, codesave-CodeArea);
    }
#endif

    return codesave-CodeArea;
}


果然还是相当典型且直观的解释器。这种不断读入VM码(在每个case之后决定code的增量),并通过单一的switch语句来完成dispatch的方法,是基本解释器实现里最直观,但通常也是最慢的方式。
对于一个比较小的指令集(例如RISC),threaded code通常是更好的解决方案,因为在每个指令例程的最后添加跳转操作能减少现代CPU的跳转预测失误。其中又有indirected与directed这两大类的threadeding。Anton Ertl有篇关于threaded code不错的文章。在Virtual Machines: Versatile Platforms for Systems and Processes一书中有更详细的解释。
TJS2 VM现有的实现有不少让人很想吐槽的地方,例如说基于引用计数的GC,还有那难以阅读的代码……那么大量的宏用起来真够难受的。
其实W.Dee氏之所以宁可直接开始实现吉里吉里3的Risse VM而不在现有的吉里吉里2的codebase上修改,恐怕也是因为这codebase太乱了吧。新的Risse VM已经有了不少实质性的改进,例如用Boehm GC来代替原本不太好的引用计数GC;将中间表示(IR)改进为SSA形式,等等。不过就这么那TJS2 VM扔掉也怪可惜的。想慢慢把TJS2 VM中可改进的地方挖出来,看看是否适合给予改进。如果能赶在吉里吉里3的Risse VM完成前对TJS2 VM做些改进,那还算有价值。

不过吉里吉里系列内的VM有个很紧的要求,那就是整个运行时的外表看起来要像一个解释器,即:内部实现是先将文本形式的脚本源代码编译为中间表现,然后再由VM执行(此处的VM又是一个真正的解释器)。这对编译的部分要求比较高,使一些耗时间的优化不太好进行。要是W.Dee氏肯接受真正完整的编译,再交由VM执行,会轻松很多。

(试想一下,写一个程序把Java Compiler与JVM包装起来,像解释脚本一样执行Java源文件。如果你用的是Sun的JDK,恭喜你,编译HelloWorld可能也要半分钟。而且JDK 1.6.0系列还经常诡异的出现NoClassDefFoundError,让我只好对1.6.0系列敬而远之)

附注:吉里吉里2的源代码基于GPL许可证发布
分享到:
评论
4 楼 RednaxelaFX 2007-10-31  
Rhino 1.6R7里,org.mozilla.javascript.Parser类果然只做了parser的建语法树的工作。实际的scan工作交由org.mozilla.javascript.TokenStream完成。真难以想象会看来这样的代码:
private static int stringToKeyword(String name) {
    // ...
    L0: { id = 0; String X = null; int c;
	L: switch (s.length()) {
		case 2: c=s.charAt(1);
            if (c=='f') { if (s.charAt(0)=='i') {id=Id_if; break L0;} }
            else if (c=='n') { if (s.charAt(0)=='i') {id=Id_in; break L0;} }
            else if (c=='o') { if (s.charAt(0)=='d') {id=Id_do; break L0;} }
            break L;
        case 3: switch (s.charAt(0)) {
            case 'f': if (s.charAt(2)=='r' && s.charAt(1)=='o') {id=Id_for; break L0;} break L;
            case 'i': if (s.charAt(2)=='t' && s.charAt(1)=='n') {id=Id_int; break L0;} break L;
            case 'n': if (s.charAt(2)=='w' && s.charAt(1)=='e') {id=Id_new; break L0;} break L;
            case 't': if (s.charAt(2)=='y' && s.charAt(1)=='r') {id=Id_try; break L0;} break L;
            case 'v': if (s.charAt(2)=='r' && s.charAt(1)=='a') {id=Id_var; break L0;} break L;
            } break L;
        // ...
        }
    // ...
    if (id == 0) { return Token.EOF; }
    return id & 0xff;
}

想想也挺合情合理的。我只是很佩服这hardcode的胆识……
3 楼 RednaxelaFX 2007-10-31  
呃,找到了。原来代码生成是在org.mozilla.javascript.optimizer.Codegen.generateCode(String encodedSource)。这Codegen本身又继承于上面的Interpreter。
2 楼 RednaxelaFX 2007-10-31  
在Mozilla的Java版JavaScript实现,Rhino 1.6R7里,语句、表达式通过遍历语法树,生成一定的中间代码(ICode),然后再计算执行。在org.mozilla.javascript.Interpreter.visitExpression(Node node, int contextFlags)中:

private void visitExpression(Node node, int contextFlags)
{
    int type = node.getType();
    Node child = node.getFirstChild();
    int savedStackDepth = itsStackDepth;
    switch (type) {

      case Token.FUNCTION:
        {
            int fnIndex = node.getExistingIntProp(Node.FUNCTION_PROP);
            FunctionNode fn = scriptOrFn.getFunctionNode(fnIndex);
            // See comments in visitStatement for Token.FUNCTION case
            if (fn.getFunctionType() != FunctionNode.FUNCTION_EXPRESSION) {
                throw Kit.codeBug();
            }
            addIndexOp(Icode_CLOSURE_EXPR, fnIndex);
            stackChange(1);
        }
        break;

      case Token.LOCAL_LOAD:
        {
            int localIndex = getLocalBlockRef(node);
            addIndexOp(Token.LOCAL_LOAD, localIndex);
            stackChange(1);
        }
        break;

      // rest of cases omitted

      case Token.DEFAULTNAMESPACE :
      case Token.ESCXMLATTR :
      case Token.ESCXMLTEXT :
        visitExpression(child, 0);
        addToken(type);
        break;

      default:
        throw badTree(node);
    }
    if (savedStackDepth + 1 != itsStackDepth) {
        Kit.codeBug();
    }
}

hmm...但是我记得看到过文章说Rhino是部分编译为JVM bytecode执行的。大概我还没找到具体的执行位置吧。再找找看。
1 楼 RednaxelaFX 2007-10-31  
今天小读了一会Bill Blunden的Virtual Machine Design and Implementation in C,C++。然后“高兴”的发现书中提到的HEC的dispatch loop也是"the big switch"...如下

void run(U1 dbg)
{
    U2 tick;
    U8 start_debug_instr;
    
    tick=0;
    debug = dbg; /*set global from dbgcmd.c*/
    
    interruptOn=TRUE;
    
    DBG_RUN0("initiating bytecode execution\n");
    
    while(RAM[ R[$IP] ] != HALT)
    {
        if(debug==TRUE){ readDbgCmd(); }
        start_debug_instr = (U8)R[$IP];
        
        switch(RAM[ R[$IP] ])
        {
            case LBI:{ HANDLE_LBI(); }break;
            case LWI:{ HANDLE_LWI(); }break;
            case LDI:{ HANDLE_LDI(); }break;
            case LQI:{ HANDLE_LQI(); }break;
            case LF1I:{ HANDLE_LF1I(); }break;
            case LF2I:{ HANDLE_LF2I(); }break;
            // rest of handlers omitted
            default:
            {
                xmlBegin();
                fprintf(errPtr,"run(): ");
                fprintf(errPtr,"bad instruction (%d) ",RAM[R[$IP]]);
                fprintf(errPtr,"at address = ");
                fpU8(errPtr,(U8)R[$IP]);
                fprintf(errPtr,"\n");
                xmlEnd();
                ERROR_LVL1("run(): fatal error\n");
            }break;
        }/*end switch*/
        if(debug==TRUE){ printDbgInstr(start_debug_instr); }
        tick++;
        if(tick==65535){ tick = 0; }
    }/*end while*/
    
    if(debug==TRUE){ readDbgCmd(); }
    DBG_RUN0("HALT instruction executed\n");
    
    return;
}/*end run*/


Good job.果然这种才是最直观的 T T

相关推荐

Global site tag (gtag.js) - Google Analytics