From ef6d1791eb2a5ef3af913945ca577ea76d4ff97e Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Sat, 3 Aug 2019 12:36:06 +0900 Subject: [PATCH] Add a tokenizer to allow space characters between tokens --- main.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++------- test.sh | 1 + 2 files changed, 112 insertions(+), 14 deletions(-) diff --git a/main.c b/main.c index 7a28076..ae40deb 100644 --- a/main.c +++ b/main.c @@ -1,34 +1,131 @@ +#include +#include +#include #include #include +#include + +typedef enum { + TK_RESERVED, // Keywords or punctuators + TK_NUM, // Integer literals + TK_EOF, // End-of-file markers +} TokenKind; + +// Token type +typedef struct Token Token; +struct Token { + TokenKind kind; // Token kind + Token *next; // Next token + int val; // If kind is TK_NUM, its value + char *str; // Token string +}; + +// Current token +Token *token; + +// Reports an error and exit. +void error(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + exit(1); +} + +// Consumes the current token if it matches `op`. +bool consume(char op) { + if (token->kind != TK_RESERVED || token->str[0] != op) + return false; + token = token->next; + return true; +} + +// Ensure that the current token is `op`. +void expect(char op) { + if (token->kind != TK_RESERVED || token->str[0] != op) + error("expected '%c'", op); + token = token->next; +} + +// Ensure that the current token is TK_NUM. +int expect_number() { + if (token->kind != TK_NUM) + error("expected a number"); + int val = token->val; + token = token->next; + return val; +} + +bool at_eof() { + return token->kind == TK_EOF; +} + +// Create a new token and add it as the next token of `cur`. +Token *new_token(TokenKind kind, Token *cur, char *str) { + Token *tok = calloc(1, sizeof(Token)); + tok->kind = kind; + tok->str = str; + cur->next = tok; + return tok; +} + +// Tokenize `p` and returns new tokens. +Token *tokenize(char *p) { + Token head; + head.next = NULL; + Token *cur = &head; + + while (*p) { + // Skip whitespace characters. + if (isspace(*p)) { + p++; + continue; + } + + // Punctuator + if (*p == '+' || *p == '-') { + cur = new_token(TK_RESERVED, cur, p++); + continue; + } + + // Integer literal + if (isdigit(*p)) { + cur = new_token(TK_NUM, cur, p); + cur->val = strtol(p, &p, 10); + continue; + } + + error("invalid token"); + } + + new_token(TK_EOF, cur, p); + return head.next; +} int main(int argc, char **argv) { if (argc != 2) { - fprintf(stderr, "%s: invalid number of arguments\n", argv[0]); + error("%s: invalid number of arguments", argv[0]); return 1; } - char *p = argv[1]; + token = tokenize(argv[1]); printf(".intel_syntax noprefix\n"); printf(".global main\n"); printf("main:\n"); - printf(" mov rax, %ld\n", strtol(p, &p, 10)); - while (*p) { - if (*p == '+') { - p++; - printf(" add rax, %ld\n", strtol(p, &p, 10)); - continue; - } + // The first token must be a number + printf(" mov rax, %d\n", expect_number()); - if (*p == '-') { - p++; - printf(" sub rax, %ld\n", strtol(p, &p, 10)); + // ... followed by either `+ ` or `- `. + while (!at_eof()) { + if (consume('+')) { + printf(" add rax, %d\n", expect_number()); continue; } - fprintf(stderr, "unexpected character: '%c'\n", *p); - return 1; + expect('-'); + printf(" sub rax, %d\n", expect_number()); } printf(" ret\n"); diff --git a/test.sh b/test.sh index ca477d9..823139e 100755 --- a/test.sh +++ b/test.sh @@ -19,5 +19,6 @@ assert() { assert 0 0 assert 42 42 assert 21 '5+20-4' +assert 41 ' 12 + 34 - 5 ' echo OK -- GitLab