From c95fb822772bee1b872cb23d3c58e760616ec6b8 Mon Sep 17 00:00:00 2001 From: Rui Ueyama Date: Mon, 6 Jul 2020 18:59:25 +0900 Subject: [PATCH] Add UTF-32 character literal --- test/unicode.c | 7 +++++++ tokenize.c | 7 +++++++ 2 files changed, 14 insertions(+) diff --git a/test/unicode.c b/test/unicode.c index 6d04653..9081eee 100644 --- a/test/unicode.c +++ b/test/unicode.c @@ -21,6 +21,13 @@ int main() { ASSERT(12354, u'あ'); ASSERT(62307, u'🍣'); + ASSERT(4, sizeof(U'\0')); + ASSERT(1, U'\xffffffff'>>31); + ASSERT(97, U'a'); + ASSERT(946, U'β'); + ASSERT(12354, U'あ'); + ASSERT(127843, U'🍣'); + printf("OK\n"); return 0; } diff --git a/tokenize.c b/tokenize.c index 5905f91..783e0fd 100644 --- a/tokenize.c +++ b/tokenize.c @@ -470,6 +470,13 @@ Token *tokenize(File *file) { continue; } + // UTF-32 character literal + if (startswith(p, "U'")) { + cur = read_char_literal(cur, p + 1, ty_uint); + p += cur->len + 1; + continue; + } + // Identifier or keyword if (is_ident1(*p)) { char *q = p++; -- GitLab