From 725badfb494544b7c7f1d4c4690b9bc033c6d051 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Wed, 7 Oct 2020 20:11:16 +0900 Subject: [PATCH] Split main.c into multiple small files --- Makefile | 8 +- chibicc.h | 68 +++++++++ codegen.c | 75 ++++++++++ main.c | 411 +---------------------------------------------------- parse.c | 165 +++++++++++++++++++++ tokenize.c | 107 ++++++++++++++ 6 files changed, 425 insertions(+), 409 deletions(-) create mode 100644 chibicc.h create mode 100644 codegen.c create mode 100644 parse.c create mode 100644 tokenize.c diff --git a/Makefile b/Makefile index e6a3398..b733d58 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,11 @@ CFLAGS=-std=c11 -g -fno-common +SRCS=$(wildcard *.c) +OBJS=$(SRCS:.c=.o) -chibicc: main.o - $(CC) -o chibicc main.o $(LDFLAGS) +chibicc: $(OBJS) + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) + +$(OBJS): chibicc.h test: chibicc ./test.sh diff --git a/chibicc.h b/chibicc.h new file mode 100644 index 0000000..56763b3 --- /dev/null +++ b/chibicc.h @@ -0,0 +1,68 @@ +#include +#include +#include +#include +#include +#include +#include + +// +// tokenize.c +// + +typedef enum { + TK_PUNCT, // Keywords or punctuators + TK_NUM, // Numeric literals + TK_EOF, // End-of-file markers +} TokenKind; + +// Token type +typedef struct Token Token; +struct Token { + TokenKind kind; // Token kind + Token *next; // Next token + int val; // If kind is TK_NUM, its value + char *loc; // Token location + int len; // Token length +}; + +void error(char *fmt, ...); +void error_at(char *loc, char *fmt, ...); +void error_tok(Token *tok, char *fmt, ...); +bool equal(Token *tok, char *op); +Token *skip(Token *tok, char *op); +Token *tokenize(char *input); + +// +// parse.c +// + +typedef enum { + ND_ADD, // + + ND_SUB, // - + ND_MUL, // * + ND_DIV, // / + ND_NEG, // unary - + ND_EQ, // == + ND_NE, // != + ND_LT, // < + ND_LE, // <= + ND_NUM, // Integer +} NodeKind; + +// AST node type +typedef struct Node Node; +struct Node { + NodeKind kind; // Node kind + Node *lhs; // Left-hand side + Node *rhs; // Right-hand side + int val; // Used if kind == ND_NUM +}; + +Node *parse(Token *tok); + +// +// codegen.c +// + +void codegen(Node *node); diff --git a/codegen.c b/codegen.c new file mode 100644 index 0000000..e7f792b --- /dev/null +++ b/codegen.c @@ -0,0 +1,75 @@ +#include "chibicc.h" + +static int depth; + +static void push(void) { + printf(" push %%rax\n"); + depth++; +} + +static void pop(char *arg) { + printf(" pop %s\n", arg); + depth--; +} + +static void gen_expr(Node *node) { + switch (node->kind) { + case ND_NUM: + printf(" mov $%d, %%rax\n", node->val); + return; + case ND_NEG: + gen_expr(node->lhs); + printf(" neg %%rax\n"); + return; + } + + gen_expr(node->rhs); + push(); + gen_expr(node->lhs); + pop("%rdi"); + + switch (node->kind) { + case ND_ADD: + printf(" add %%rdi, %%rax\n"); + return; + case ND_SUB: + printf(" sub %%rdi, %%rax\n"); + return; + case ND_MUL: + printf(" imul %%rdi, %%rax\n"); + return; + case ND_DIV: + printf(" cqo\n"); + printf(" idiv %%rdi\n"); + return; + case ND_EQ: + case ND_NE: + case ND_LT: + case ND_LE: + printf(" cmp %%rdi, %%rax\n"); + + if (node->kind == ND_EQ) + printf(" sete %%al\n"); + else if (node->kind == ND_NE) + printf(" setne %%al\n"); + else if (node->kind == ND_LT) + printf(" setl %%al\n"); + else if (node->kind == ND_LE) + printf(" setle %%al\n"); + + printf(" movzb %%al, %%rax\n"); + return; + } + + error("invalid expression"); +} + +void codegen(Node *node) { + printf(" .globl main\n"); + printf("main:\n"); + + gen_expr(node); + printf(" ret\n"); + + assert(depth == 0); +} diff --git a/main.c b/main.c index 0be4717..adb525c 100644 --- a/main.c +++ b/main.c @@ -1,414 +1,11 @@ -#include -#include -#include -#include -#include -#include -#include - -// -// Tokenizer -// - -typedef enum { - TK_PUNCT, // Punctuators - TK_NUM, // Numeric literals - TK_EOF, // End-of-file markers -} TokenKind; - -// Token type -typedef struct Token Token; -struct Token { - TokenKind kind; // Token kind - Token *next; // Next token - int val; // If kind is TK_NUM, its value - char *loc; // Token location - int len; // Token length -}; - -// Input string -static char *current_input; - -// Reports an error and exit. -static void error(char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - exit(1); -} - -// Reports an error location and exit. -static void verror_at(char *loc, char *fmt, va_list ap) { - int pos = loc - current_input; - fprintf(stderr, "%s\n", current_input); - fprintf(stderr, "%*s", pos, ""); // print pos spaces. - fprintf(stderr, "^ "); - vfprintf(stderr, fmt, ap); - fprintf(stderr, "\n"); - exit(1); -} - -static void error_at(char *loc, char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - verror_at(loc, fmt, ap); -} - -static void error_tok(Token *tok, char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - verror_at(tok->loc, fmt, ap); -} - -// Consumes the current token if it matches `s`. -static bool equal(Token *tok, char *op) { - return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0'; -} - -// Ensure that the current token is `s`. -static Token *skip(Token *tok, char *s) { - if (!equal(tok, s)) - error_tok(tok, "expected '%s'", s); - return tok->next; -} - -// Ensure that the current token is TK_NUM. -static int get_number(Token *tok) { - if (tok->kind != TK_NUM) - error_tok(tok, "expected a number"); - return tok->val; -} - -// Create a new token. -static Token *new_token(TokenKind kind, char *start, char *end) { - Token *tok = calloc(1, sizeof(Token)); - tok->kind = kind; - tok->loc = start; - tok->len = end - start; - return tok; -} - -static bool startswith(char *p, char *q) { - return strncmp(p, q, strlen(q)) == 0; -} - -// Read a punctuator token from p and returns its length. -static int read_punct(char *p) { - if (startswith(p, "==") || startswith(p, "!=") || - startswith(p, "<=") || startswith(p, ">=")) - return 2; - - return ispunct(*p) ? 1 : 0; -} - -// Tokenize `current_input` and returns new tokens. -static Token *tokenize(void) { - char *p = current_input; - Token head = {}; - Token *cur = &head; - - while (*p) { - // Skip whitespace characters. - if (isspace(*p)) { - p++; - continue; - } - - // Numeric literal - if (isdigit(*p)) { - cur = cur->next = new_token(TK_NUM, p, p); - char *q = p; - cur->val = strtoul(p, &p, 10); - cur->len = p - q; - continue; - } - - // Punctuators - int punct_len = read_punct(p); - if (punct_len) { - cur = cur->next = new_token(TK_PUNCT, p, p + punct_len); - p += cur->len; - continue; - } - - error_at(p, "invalid token"); - } - - cur = cur->next = new_token(TK_EOF, p, p); - return head.next; -} - -// -// Parser -// - -typedef enum { - ND_ADD, // + - ND_SUB, // - - ND_MUL, // * - ND_DIV, // / - ND_NEG, // unary - - ND_EQ, // == - ND_NE, // != - ND_LT, // < - ND_LE, // <= - ND_NUM, // Integer -} NodeKind; - -// AST node type -typedef struct Node Node; -struct Node { - NodeKind kind; // Node kind - Node *lhs; // Left-hand side - Node *rhs; // Right-hand side - int val; // Used if kind == ND_NUM -}; - -static Node *new_node(NodeKind kind) { - Node *node = calloc(1, sizeof(Node)); - node->kind = kind; - return node; -} - -static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) { - Node *node = new_node(kind); - node->lhs = lhs; - node->rhs = rhs; - return node; -} - -static Node *new_unary(NodeKind kind, Node *expr) { - Node *node = new_node(kind); - node->lhs = expr; - return node; -} - -static Node *new_num(int val) { - Node *node = new_node(ND_NUM); - node->val = val; - return node; -} - -static Node *expr(Token **rest, Token *tok); -static Node *equality(Token **rest, Token *tok); -static Node *relational(Token **rest, Token *tok); -static Node *add(Token **rest, Token *tok); -static Node *mul(Token **rest, Token *tok); -static Node *unary(Token **rest, Token *tok); -static Node *primary(Token **rest, Token *tok); - -// expr = equality -static Node *expr(Token **rest, Token *tok) { - return equality(rest, tok); -} - -// equality = relational ("==" relational | "!=" relational)* -static Node *equality(Token **rest, Token *tok) { - Node *node = relational(&tok, tok); - - for (;;) { - if (equal(tok, "==")) { - node = new_binary(ND_EQ, node, relational(&tok, tok->next)); - continue; - } - - if (equal(tok, "!=")) { - node = new_binary(ND_NE, node, relational(&tok, tok->next)); - continue; - } - - *rest = tok; - return node; - } -} - -// relational = add ("<" add | "<=" add | ">" add | ">=" add)* -static Node *relational(Token **rest, Token *tok) { - Node *node = add(&tok, tok); - - for (;;) { - if (equal(tok, "<")) { - node = new_binary(ND_LT, node, add(&tok, tok->next)); - continue; - } - - if (equal(tok, "<=")) { - node = new_binary(ND_LE, node, add(&tok, tok->next)); - continue; - } - - if (equal(tok, ">")) { - node = new_binary(ND_LT, add(&tok, tok->next), node); - continue; - } - - if (equal(tok, ">=")) { - node = new_binary(ND_LE, add(&tok, tok->next), node); - continue; - } - - *rest = tok; - return node; - } -} - -// add = mul ("+" mul | "-" mul)* -static Node *add(Token **rest, Token *tok) { - Node *node = mul(&tok, tok); - - for (;;) { - if (equal(tok, "+")) { - node = new_binary(ND_ADD, node, mul(&tok, tok->next)); - continue; - } - - if (equal(tok, "-")) { - node = new_binary(ND_SUB, node, mul(&tok, tok->next)); - continue; - } - - *rest = tok; - return node; - } -} - -// mul = unary ("*" unary | "/" unary)* -static Node *mul(Token **rest, Token *tok) { - Node *node = unary(&tok, tok); - - for (;;) { - if (equal(tok, "*")) { - node = new_binary(ND_MUL, node, unary(&tok, tok->next)); - continue; - } - - if (equal(tok, "/")) { - node = new_binary(ND_DIV, node, unary(&tok, tok->next)); - continue; - } - - *rest = tok; - return node; - } -} - -// unary = ("+" | "-") unary -// | primary -static Node *unary(Token **rest, Token *tok) { - if (equal(tok, "+")) - return unary(rest, tok->next); - - if (equal(tok, "-")) - return new_unary(ND_NEG, unary(rest, tok->next)); - - return primary(rest, tok); -} - -// primary = "(" expr ")" | num -static Node *primary(Token **rest, Token *tok) { - if (equal(tok, "(")) { - Node *node = expr(&tok, tok->next); - *rest = skip(tok, ")"); - return node; - } - - if (tok->kind == TK_NUM) { - Node *node = new_num(tok->val); - *rest = tok->next; - return node; - } - - error_tok(tok, "expected an expression"); -} - -// -// Code generator -// - -static int depth; - -static void push(void) { - printf(" push %%rax\n"); - depth++; -} - -static void pop(char *arg) { - printf(" pop %s\n", arg); - depth--; -} - -static void gen_expr(Node *node) { - switch (node->kind) { - case ND_NUM: - printf(" mov $%d, %%rax\n", node->val); - return; - case ND_NEG: - gen_expr(node->lhs); - printf(" neg %%rax\n"); - return; - } - - gen_expr(node->rhs); - push(); - gen_expr(node->lhs); - pop("%rdi"); - - switch (node->kind) { - case ND_ADD: - printf(" add %%rdi, %%rax\n"); - return; - case ND_SUB: - printf(" sub %%rdi, %%rax\n"); - return; - case ND_MUL: - printf(" imul %%rdi, %%rax\n"); - return; - case ND_DIV: - printf(" cqo\n"); - printf(" idiv %%rdi\n"); - return; - case ND_EQ: - case ND_NE: - case ND_LT: - case ND_LE: - printf(" cmp %%rdi, %%rax\n"); - - if (node->kind == ND_EQ) - printf(" sete %%al\n"); - else if (node->kind == ND_NE) - printf(" setne %%al\n"); - else if (node->kind == ND_LT) - printf(" setl %%al\n"); - else if (node->kind == ND_LE) - printf(" setle %%al\n"); - - printf(" movzb %%al, %%rax\n"); - return; - } - - error("invalid expression"); -} +#include "chibicc.h" int main(int argc, char **argv) { if (argc != 2) error("%s: invalid number of arguments", argv[0]); - // Tokenize and parse. - current_input = argv[1]; - Token *tok = tokenize(); - Node *node = expr(&tok, tok); - - if (tok->kind != TK_EOF) - error_tok(tok, "extra token"); - - printf(" .globl main\n"); - printf("main:\n"); - - // Traverse the AST to emit assembly. - gen_expr(node); - printf(" ret\n"); - - assert(depth == 0); + Token *tok = tokenize(argv[1]); + Node *node = parse(tok); + codegen(node); return 0; } diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..e855d9c --- /dev/null +++ b/parse.c @@ -0,0 +1,165 @@ +#include "chibicc.h" + +static Node *expr(Token **rest, Token *tok); +static Node *equality(Token **rest, Token *tok); +static Node *relational(Token **rest, Token *tok); +static Node *add(Token **rest, Token *tok); +static Node *mul(Token **rest, Token *tok); +static Node *unary(Token **rest, Token *tok); +static Node *primary(Token **rest, Token *tok); + +static Node *new_node(NodeKind kind) { + Node *node = calloc(1, sizeof(Node)); + node->kind = kind; + return node; +} + +static Node *new_binary(NodeKind kind, Node *lhs, Node *rhs) { + Node *node = new_node(kind); + node->lhs = lhs; + node->rhs = rhs; + return node; +} + +static Node *new_unary(NodeKind kind, Node *expr) { + Node *node = new_node(kind); + node->lhs = expr; + return node; +} + +static Node *new_num(int val) { + Node *node = new_node(ND_NUM); + node->val = val; + return node; +} + +// expr = equality +static Node *expr(Token **rest, Token *tok) { + return equality(rest, tok); +} + +// equality = relational ("==" relational | "!=" relational)* +static Node *equality(Token **rest, Token *tok) { + Node *node = relational(&tok, tok); + + for (;;) { + if (equal(tok, "==")) { + node = new_binary(ND_EQ, node, relational(&tok, tok->next)); + continue; + } + + if (equal(tok, "!=")) { + node = new_binary(ND_NE, node, relational(&tok, tok->next)); + continue; + } + + *rest = tok; + return node; + } +} + +// relational = add ("<" add | "<=" add | ">" add | ">=" add)* +static Node *relational(Token **rest, Token *tok) { + Node *node = add(&tok, tok); + + for (;;) { + if (equal(tok, "<")) { + node = new_binary(ND_LT, node, add(&tok, tok->next)); + continue; + } + + if (equal(tok, "<=")) { + node = new_binary(ND_LE, node, add(&tok, tok->next)); + continue; + } + + if (equal(tok, ">")) { + node = new_binary(ND_LT, add(&tok, tok->next), node); + continue; + } + + if (equal(tok, ">=")) { + node = new_binary(ND_LE, add(&tok, tok->next), node); + continue; + } + + *rest = tok; + return node; + } +} + +// add = mul ("+" mul | "-" mul)* +static Node *add(Token **rest, Token *tok) { + Node *node = mul(&tok, tok); + + for (;;) { + if (equal(tok, "+")) { + node = new_binary(ND_ADD, node, mul(&tok, tok->next)); + continue; + } + + if (equal(tok, "-")) { + node = new_binary(ND_SUB, node, mul(&tok, tok->next)); + continue; + } + + *rest = tok; + return node; + } +} + +// mul = unary ("*" unary | "/" unary)* +static Node *mul(Token **rest, Token *tok) { + Node *node = unary(&tok, tok); + + for (;;) { + if (equal(tok, "*")) { + node = new_binary(ND_MUL, node, unary(&tok, tok->next)); + continue; + } + + if (equal(tok, "/")) { + node = new_binary(ND_DIV, node, unary(&tok, tok->next)); + continue; + } + + *rest = tok; + return node; + } +} + +// unary = ("+" | "-") unary +// | primary +static Node *unary(Token **rest, Token *tok) { + if (equal(tok, "+")) + return unary(rest, tok->next); + + if (equal(tok, "-")) + return new_unary(ND_NEG, unary(rest, tok->next)); + + return primary(rest, tok); +} + +// primary = "(" expr ")" | num +static Node *primary(Token **rest, Token *tok) { + if (equal(tok, "(")) { + Node *node = expr(&tok, tok->next); + *rest = skip(tok, ")"); + return node; + } + + if (tok->kind == TK_NUM) { + Node *node = new_num(tok->val); + *rest = tok->next; + return node; + } + + error_tok(tok, "expected an expression"); +} + +Node *parse(Token *tok) { + Node *node = expr(&tok, tok); + if (tok->kind != TK_EOF) + error_tok(tok, "extra token"); + return node; +} diff --git a/tokenize.c b/tokenize.c new file mode 100644 index 0000000..cc05255 --- /dev/null +++ b/tokenize.c @@ -0,0 +1,107 @@ +#include "chibicc.h" + +// Input string +static char *current_input; + +// Reports an error and exit. +void error(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + exit(1); +} + +// Reports an error location and exit. +static void verror_at(char *loc, char *fmt, va_list ap) { + int pos = loc - current_input; + fprintf(stderr, "%s\n", current_input); + fprintf(stderr, "%*s", pos, ""); // print pos spaces. + fprintf(stderr, "^ "); + vfprintf(stderr, fmt, ap); + fprintf(stderr, "\n"); + exit(1); +} + +void error_at(char *loc, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + verror_at(loc, fmt, ap); +} + +void error_tok(Token *tok, char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + verror_at(tok->loc, fmt, ap); +} + +// Consumes the current token if it matches `op`. +bool equal(Token *tok, char *op) { + return memcmp(tok->loc, op, tok->len) == 0 && op[tok->len] == '\0'; +} + +// Ensure that the current token is `op`. +Token *skip(Token *tok, char *op) { + if (!equal(tok, op)) + error_tok(tok, "expected '%s'", op); + return tok->next; +} + +// Create a new token. +static Token *new_token(TokenKind kind, char *start, char *end) { + Token *tok = calloc(1, sizeof(Token)); + tok->kind = kind; + tok->loc = start; + tok->len = end - start; + return tok; +} + +static bool startswith(char *p, char *q) { + return strncmp(p, q, strlen(q)) == 0; +} + +// Read a punctuator token from p and returns its length. +static int read_punct(char *p) { + if (startswith(p, "==") || startswith(p, "!=") || + startswith(p, "<=") || startswith(p, ">=")) + return 2; + + return ispunct(*p) ? 1 : 0; +} + +// Tokenize `current_input` and returns new tokens. +Token *tokenize(char *p) { + current_input = p; + Token head = {}; + Token *cur = &head; + + while (*p) { + // Skip whitespace characters. + if (isspace(*p)) { + p++; + continue; + } + + // Numeric literal + if (isdigit(*p)) { + cur = cur->next = new_token(TK_NUM, p, p); + char *q = p; + cur->val = strtoul(p, &p, 10); + cur->len = p - q; + continue; + } + + // Punctuators + int punct_len = read_punct(p); + if (punct_len) { + cur = cur->next = new_token(TK_PUNCT, p, p + punct_len); + p += cur->len; + continue; + } + + error_at(p, "invalid token"); + } + + cur = cur->next = new_token(TK_EOF, p, p); + return head.next; +} -- GitLab