From f6e012bf05054eb4e5ce7e8decdf23c0b19eadee Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sat, 3 Aug 2019 15:36:43 +0900 Subject: [PATCH] Add *, / and () --- main.c | 193 ++++++++++++++++++++++++++++++++++++++++++++++++-------- test.sh | 3 + 2 files changed, 170 insertions(+), 26 deletions(-) diff --git a/main.c b/main.c index de20109..a8ca2ad 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -5,6 +6,10 @@ #include #include +// +// Tokenizer +// + typedef enum { TK_RESERVED, // Keywords or punctuators TK_NUM, // Numeric literals @@ -69,13 +74,6 @@ static Token *skip(Token *tok, char *s) { return tok->next; } -// Ensure that the current token is TK_NUM. -static int get_number(Token *tok) { - if (tok->kind != TK_NUM) - error_tok(tok, "expected a number"); - return tok->val; -} - // Create a new token and add it as the next token of `cur`. static Token *new_token(TokenKind kind, Token *cur, char *str, int len) { Token *tok = calloc(1, sizeof(Token)); @@ -108,8 +106,8 @@ static Token *tokenize(void) { continue; } - // Punctuator - if (*p == '+' || *p == '-') { + // Punctuators + if (ispunct(*p)) { cur = new_token(TK_RESERVED, cur, p++, 1); continue; } @@ -121,33 +119,176 @@ static Token *tokenize(void) { return head.next; } -int main(int argc, char **argv) { - if (argc != 2) - error("%s: invalid number of arguments", argv[0]); +// +// Parser +// - current_input = argv[1]; - Token *tok = tokenize(); +typedef enum { + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NUM, // Integer +} NodeKind; - printf(" .globl main\n"); - printf("main:\n"); +// AST node type +typedef struct Node Node; +struct Node { + NodeKind kind; // Node kind + Node *lhs; // Left-hand side + Node *rhs; // Right-hand side + int val; // Used if kind == ND_NUM +}; + +static Node *new_node(NodeKind kind) { + Node *node = calloc(1, sizeof(Node)); + node->kind = kind; + return node; +} + +static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) { + Node *node = new_node(kind); + node->lhs = lhs; + node->rhs = rhs; + return node; +} - // The first token must be a number - printf(" mov $%d, %%rax\n", get_number(tok)); - tok = tok->next; +static Node *new_num(int val) { + Node *node = new_node(ND_NUM); + node->val = val; + return node; +} + +static Node *expr(Token **rest, Token *tok); +static Node *mul(Token **rest, Token *tok); +static Node *primary(Token **rest, Token *tok); - // ... followed by either `+ ` or `- `. - while (tok->kind != TK_EOF) { +// expr = mul ("+" mul | "-" mul)* +static Node *expr(Token **rest, Token *tok) { + Node *node = mul(&tok, tok); + + for (;;) { if (equal(tok, "+")) { - printf(" add $%d, %%rax\n", get_number(tok->next)); - tok = tok->next->next; + Node *rhs = mul(&tok, tok->next); + node = new_binary(ND_ADD, node, rhs); + continue; + } + + if (equal(tok, "-")) { + Node *rhs = mul(&tok, tok->next); + node = new_binary(ND_SUB, node, rhs); + continue; + } + + *rest = tok; + return node; + } +} + +// mul = primary ("*" primary | "/" primary)* +static Node *mul(Token **rest, Token *tok) { + Node *node = primary(&tok, tok); + + for (;;) { + if (equal(tok, "*")) { + Node *rhs = primary(&tok, tok->next); + node = new_binary(ND_MUL, node, rhs); + continue; + } + + if (equal(tok, "/")) { + Node *rhs = primary(&tok, tok->next); + node = new_binary(ND_DIV, node, rhs); continue; } - tok = skip(tok, "-"); - printf(" sub $%d, %%rax\n", get_number(tok)); - tok = tok->next; + *rest = tok; + return node; } +} + +// primary = "(" expr ")" | num +static Node *primary(Token **rest, Token *tok) { + if (equal(tok, "(")) { + Node *node = expr(&tok, tok->next); + *rest = skip(tok, ")"); + return node; + } + + if (tok->kind == TK_NUM) { + Node *node = new_num(tok->val); + *rest = tok->next; + return node; + } + + error_tok(tok, "expected an expression"); +} + +// +// Code generator +// + +static int depth; + +static void push(void) { + printf(" push %%rax\n"); + depth++; +} + +static void pop(char *arg) { + printf(" pop %s\n", arg); + depth--; +} + +static void gen_expr(Node *node) { + if (node->kind == ND_NUM) { + printf(" mov $%d, %%rax\n", node->val); + return; + } + + gen_expr(node->rhs); + push(); + gen_expr(node->lhs); + pop("%rdi"); + switch (node->kind) { + case ND_ADD: + printf(" add %%rdi, %%rax\n"); + return; + case ND_SUB: + printf(" sub %%rdi, %%rax\n"); + return; + case ND_MUL: + printf(" imul %%rdi, %%rax\n"); + return; + case ND_DIV: + printf(" cqo\n"); + printf(" idiv %%rdi\n"); + return; + } + + error("invalid expression"); +} + +int main(int argc, char **argv) { + if (argc != 2) + error("%s: invalid number of arguments", argv[0]); + + // Tokenize and parse. + current_input = argv[1]; + Token *tok = tokenize(); + Node *node = expr(&tok, tok); + + if (tok->kind != TK_EOF) + error_tok(tok, "extra token"); + + printf(" .globl main\n"); + printf("main:\n"); + + // Traverse the AST to emit assembly. + gen_expr(node); printf(" ret\n"); + + assert(depth == 0); return 0; } diff --git a/test.sh b/test.sh index 876e4e2..d882d77 100755 --- a/test.sh +++ b/test.sh @@ -20,5 +20,8 @@ assert 0 0 assert 42 42 assert 21 '5+20-4' assert 41 ' 12 + 34 - 5 ' +assert 47 '5+6*7' +assert 15 '5*(9-6)' +assert 4 '(3+5)/2' echo OK -- GitLab