diff --git a/test/unicode.c b/test/unicode.c index 6d04653c8292840a8646fb92163e21f4b34bab0b..9081eeec89140ddc6fdc0ce618e19b91fa50e543 100644 --- a/test/unicode.c +++ b/test/unicode.c @@ -21,6 +21,13 @@ int main() { ASSERT(12354, u'あ'); ASSERT(62307, u'🍣'); + ASSERT(4, sizeof(U'\0')); + ASSERT(1, U'\xffffffff'>>31); + ASSERT(97, U'a'); + ASSERT(946, U'β'); + ASSERT(12354, U'あ'); + ASSERT(127843, U'🍣'); + printf("OK\n"); return 0; } diff --git a/tokenize.c b/tokenize.c index 5905f91fe7d32a244ec228e31a7c9c9c7faf7bce..783e0fd7eb6f5ac7f4d74896dd21a61c2c530d44 100644 --- a/tokenize.c +++ b/tokenize.c @@ -470,6 +470,13 @@ Token *tokenize(File *file) { continue; } + // UTF-32 character literal + if (startswith(p, "U'")) { + cur = read_char_literal(cur, p + 1, ty_uint); + p += cur->len + 1; + continue; + } + // Identifier or keyword if (is_ident1(*p)) { char *q = p++;