compiler

Unnamed Compiled Systems Language Project
git clone http://frotz.net/git/compiler.git
Log | Files | Refs

commit 68eb090a8b54fad8667610a77d713ca44188f42b
parent f6aa98fb22a36686f04b8f279c6f9bd5eeff855b
Author: Brian Swetland <swetland@frotz.net>
Date:   Fri, 26 Nov 2021 15:57:53 -0800

compiler2: minor shuffle

Group all of the lexer bits together.

Diffstat:
Msrc/compiler2.c | 175+++++++++++++++++++++++++++++++++++++++++--------------------------------------
1 file changed, 91 insertions(+), 84 deletions(-)

diff --git a/src/compiler2.c b/src/compiler2.c @@ -29,90 +29,8 @@ typedef char** args; typedef uint32_t* u32ptr; #endif -// token classes (tok & tcMASK) -enum { - tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, - tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, -}; - -enum { - // EndMarks, Braces, Brackets Parens - tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, - // RelOps (do not reorder) - tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, - // AddOps (do not reorder) - tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, - // MulOps (do not reorder) - tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F, - // AsnOps (do not reorder) - tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, - tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F, - // Various, UnaryNot, LogicalOps, - tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, - tASSIGN, tINC, tDEC, - // Keywords - tTYPE, tFUNC, tSTRUCT, tVAR, tENUM, - tIF, tELSE, tWHILE, - tBREAK, tCONTINUE, tRETURN, - tFOR, tSWITCH, tCASE, - tTRUE, tFALSE, tNIL, - tIDN, tNUM, tSTR, - // used internal to the lexer but never returned - tSPC, tINV, tDQT, tSQT, tMSC, -}; - -str tnames[] = { - "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", - "==", "!=", "<", "<=", ">", ">=", "", "", - "+", "-", "|", "^", "", "", "", "", - "*", "/", "%", "&", "&~", "<<", ">>", "", - "+=", "-=", "|=", "^=", "", "", "", "", - "*=", "/=", "%=", "&=", "&~=", "<<=", ">>=", "", - ";", ":", ".", ",", "~", "&&", "||", "!", - "=", "++", "--", - "type", "func", "struct", "var", "enum", - "if", "else", "while", - "break", "continue", "return", - "for", "switch", "case", - "true", "false", "nil", - "<ID>", "<NUM>", "<STR>", - "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", -}; - -u8 lextab[256] = { - tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, - tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, - tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, - tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, - tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, - tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, - tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, - tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, -}; +// ------------------------------------------------------------------ +// structures typedef struct StringRec StringRec; typedef struct CtxRec CtxRec; @@ -127,6 +45,7 @@ struct StringRec { }; // ------------------------------------------------------------------ +// compiler global context struct CtxRec { const char* filename; // filename of active source @@ -263,6 +182,94 @@ void ctx_open_source(const char* filename) { ctx.byteoffset = 0; } +// ================================================================ +// lexical scanner + +// token classes (tok & tcMASK) +enum { + tcRELOP = 0x08, tcADDOP = 0x10, tcMULOP = 0x18, + tcAEQOP = 0x20, tcMEQOP = 0x28, tcMASK = 0xF8, +}; + +enum { + // EndMarks, Braces, Brackets Parens + tEOF, tEOL, tOBRACE, tCBRACE, tOBRACK, tCBRACK, tOPAREN, tCPAREN, + // RelOps (do not reorder) + tEQ, tNE, tLT, tLE, tGT, tGE, tx0E, tx0F, + // AddOps (do not reorder) + tPLUS, tMINUS, tPIPE, tCARET, tx14, tx15, tx16, tx17, + // MulOps (do not reorder) + tSTAR, tSLASH, tPERCENT, tAMP, tANDNOT, tLEFT, tRIGHT, tx1F, + // AsnOps (do not reorder) + tADDEQ, tSUBEQ, tOREQ, tXOREQ, tx24, tx25, tx26, tx27, + tMULEQ, tDIVEQ, tMODEQ, tANDEQ, tANNEQ, tLSEQ, tRSEQ, t2F, + // Various, UnaryNot, LogicalOps, + tSEMI, tCOLON, tDOT, tCOMMA, tNOT, tAND, tOR, tBANG, + tASSIGN, tINC, tDEC, + // Keywords + tTYPE, tFUNC, tSTRUCT, tVAR, tENUM, + tIF, tELSE, tWHILE, + tBREAK, tCONTINUE, tRETURN, + tFOR, tSWITCH, tCASE, + tTRUE, tFALSE, tNIL, + tIDN, tNUM, tSTR, + // used internal to the lexer but never returned + tSPC, tINV, tDQT, tSQT, tMSC, +}; + +str tnames[] = { + "<EOF>", "<EOL>", "{", "}", "[", "]", "(", ")", + "==", "!=", "<", "<=", ">", ">=", "", "", + "+", "-", "|", "^", "", "", "", "", + "*", "/", "%", "&", "&~", "<<", ">>", "", + "+=", "-=", "|=", "^=", "", "", "", "", + "*=", "/=", "%=", "&=", "&~=", "<<=", ">>=", "", + ";", ":", ".", ",", "~", "&&", "||", "!", + "=", "++", "--", + "type", "func", "struct", "var", "enum", + "if", "else", "while", + "break", "continue", "return", + "for", "switch", "case", + "true", "false", "nil", + "<ID>", "<NUM>", "<STR>", + "<SPC>", "<INV>", "<DQT>", "<SQT>", "<MSC>", +}; + +u8 lextab[256] = { + tEOF, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tSPC, tEOL, tSPC, tINV, tSPC, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tSPC, tBANG, tDQT, tMSC, tMSC, tPERCENT, tAMP, tSQT, + tOPAREN, tCPAREN, tSTAR, tPLUS, tCOMMA, tMINUS, tDOT, tSLASH, + tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, tNUM, + tNUM, tNUM, tCOLON, tSEMI, tLT, tASSIGN, tGT, tMSC, + tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tOBRACK, tMSC, tCBRACK, tCARET, tIDN, + tMSC, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, tIDN, + tIDN, tIDN, tIDN, tOBRACE, tPIPE, tCBRACE, tNOT, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, + tINV, tINV, tINV, tINV, tINV, tINV, tINV, tINV, +}; + i32 unhex(u32 ch) { if ((ch >= '0') && (ch <= '9')) { return ch - '0';