aboutsummaryrefslogtreecommitdiff
path: root/src/build/cmeta.c
diff options
context:
space:
mode:
authorGravatar Michael Smith <mikesmiffy128@gmail.com> 2025-03-10 02:37:19 +0000
committerGravatar Michael Smith <mikesmiffy128@gmail.com> 2025-04-06 16:41:13 +0100
commit244fea664121acf12871ab5858a5fe95a2606b52 (patch)
treee42b1990ef97adc0f0ab48b9be7e11de7fee0558 /src/build/cmeta.c
parentd86b7b41453c69b3854baa7cdc05a79a3cdfe092 (diff)
downloadsst-244fea664121acf12871ab5858a5fe95a2606b52.tar.gz
sst-244fea664121acf12871ab5858a5fe95a2606b52.zip
Rewrite and redesign codegen and feature system
Also switch to somewhat proper C23 flags while we're at it. This is a huge change. It took me forever, in between being really busy. Sorry about that. But the good news is I'm now free to start integrating the various patches that have accumulated since last release. Well, at least in between still being really busy. Gotta manage expectations. The main benefit of introducing GAMESPECIFIC() is that features that don't apply to a particular game no longer show up *at all*, and less time is wasted on init. It also enables a cool optimisation wherein unnecessary REQUIRE_GAMEDATA() checks can elided at compile time whenever the gamedata is known up-front to always exist in supported games. The DEF_FEAT_CVAR macro family meanwhile makes it easier to manage the lifecycle of cvars/ccmds, with less manual registering, unhiding and such. Originally I was going to try and just hack these features into the existing codegen abomination, but it just got too terrible. This rewrite should make it easier to continue tweaking codegen behaviour in future. It also has slightly better error messages.
Diffstat (limited to 'src/build/cmeta.c')
-rw-r--r--src/build/cmeta.c454
1 files changed, 156 insertions, 298 deletions
diff --git a/src/build/cmeta.c b/src/build/cmeta.c
index 8a2416d..1903e84 100644
--- a/src/build/cmeta.c
+++ b/src/build/cmeta.c
@@ -1,5 +1,5 @@
/*
- * Copyright © 2024 Michael Smith <mikesmiffy128@gmail.com>
+ * Copyright © 2025 Michael Smith <mikesmiffy128@gmail.com>
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
@@ -15,22 +15,12 @@
*/
#include <stdio.h>
-#include <string.h>
+#include <stdlib.h>
#include "../intdefs.h"
+#include "../langext.h"
#include "../os.h"
#include "cmeta.h"
-#include "vec.h"
-
-/*
- * This file does C metadata parsing/scraping for the build system. This
- * facilitates tasks ranging from determining header dependencies to searching
- * for certain magic macros (for example cvar/command declarations) to generate
- * other code.
- *
- * It's a bit of a mess since it's kind of just hacked together for use at build
- * time. Don't worry about it too much.
- */
// lazy inlined 3rd party stuff {{{
// too lazy to write a C tokenizer at the moment, or indeed probably ever, so
@@ -56,350 +46,218 @@ Type *ty_double = &(Type){TY_DOUBLE, 8, 8};
Type *ty_ldouble = &(Type){TY_LDOUBLE, 16, 16};
// inline just a couple more things, super lazy, but whatever
static Type *new_type(TypeKind kind, int size, int align) {
- Type *ty = calloc(1, sizeof(Type));
- ty->kind = kind;
- ty->size = size;
- ty->align = align;
- return ty;
+ Type *ty = calloc(1, sizeof(Type));
+ ty->kind = kind;
+ ty->size = size;
+ ty->align = align;
+ return ty;
}
Type *array_of(Type *base, int len) {
- Type *ty = new_type(TY_ARRAY, base->size * len, base->align);
- ty->base = base;
- ty->array_len = len;
- return ty;
+ Type *ty = new_type(TY_ARRAY, base->size * len, base->align);
+ ty->base = base;
+ ty->array_len = len;
+ return ty;
}
#include "../3p/chibicc/hashmap.c"
#include "../3p/chibicc/strings.c"
#include "../3p/chibicc/tokenize.c"
-// one more copypaste from preprocess.c for #include <filename> and then I'm
-// done I promise
-static char *join_tokens(const Token *tok, const Token *end) {
- int len = 1;
- for (const Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) {
- if (t != tok && t->has_space)
- len++;
- len += t->len;
- }
- char *buf = calloc(1, len);
- int pos = 0;
- for (const Token *t = tok; t != end && t->kind != TK_EOF; t = t->next) {
- if (t != tok && t->has_space)
- buf[pos++] = ' ';
- strncpy(buf + pos, t->loc, t->len);
- pos += t->len;
- }
- buf[pos] = '\0';
- return buf;
-}
// }}}
#ifdef _WIN32
#include "../3p/openbsd/asprintf.c" // missing from libc; plonked here for now
#endif
-static void die(const char *s) {
+static noreturn die(int status, const char *s) {
fprintf(stderr, "cmeta: fatal: %s\n", s);
- exit(100);
+ exit(status);
}
-static char *readsource(const os_char *path) {
+struct cmeta cmeta_loadfile(const os_char *path) {
int f = os_open_read(path);
- if (f == -1) return 0;
- uint bufsz = 8192;
- char *buf = malloc(bufsz);
- if (!buf) die("couldn't allocate memory");
- int nread;
- int off = 0;
- while ((nread = os_read(f, buf + off, bufsz - off)) > 0) {
- off += nread;
- if (off == bufsz) {
- bufsz *= 2;
- // somewhat arbitrary cutoff
- if (bufsz == 1 << 30) die("input file is too large");
- buf = realloc(buf, bufsz);
- if (!buf) die("couldn't reallocate memory");
- }
- }
- if (nread == -1) die("couldn't read file");
- buf[off] = 0;
+ if (f == -1) die(100, "couldn't open file");
+ vlong len = os_fsize(f);
+ if (len > 1u << 30 - 1) die(2, "input file is far too large");
+ struct cmeta ret;
+ ret.sbase = malloc(len + 1);
+ ret.sbase[len] = '\0'; // chibicc needs a null terminator
+ if (!ret.sbase) die(100, "couldn't allocate memory");
+ if (os_read(f, ret.sbase, len) != len) die(100, "couldn't read file");
+ int maxitems = len / 4; // shortest word is "END"
+ ret.nitems = 0;
+ // eventual overall memory requirement: file size * 6. seems fine to me.
+ // current memory requirement: file size * 10, + all the chibicc linked list
+ // crap. not as good but we'll continue tolerating it... probably for years!
+ //ret.itemoffs = malloc(maxitems * sizeof(*ret.itemoffs));
+ //if (!ret.itemoffs) die(100, "couldn't allocate memory");
+ ret.itemtoks = malloc(maxitems * sizeof(*ret.itemtoks));
+ if (!ret.itemtoks) die(100, "couldn't allocate memory");
+ ret.itemtypes = malloc(maxitems * sizeof(*ret.itemtypes));
+ if (!ret.itemtypes) die(100, "couldn't allocate memory");
os_close(f);
- return buf;
-}
-
-// as per cmeta.h this is totally opaque; it's actually just a Token in disguise
-struct cmeta;
-
-const struct cmeta *cmeta_loadfile(const os_char *path) {
- char *buf = readsource(path);
- if (!buf) return 0;
#ifdef _WIN32
char *realname = malloc(wcslen(path) + 1);
- if (!realname) die("couldn't allocate memory");
+ if (!realname) die(100, "couldn't allocate memory");
// XXX: being lazy about Unicode right now; a general purpose tool should
// implement WTF8 or something. SST itself doesn't have any unicode paths
- // though, so don't really care as much.
+ // though, so we don't really care as much. this code still sucks though.
*realname = *path;
for (const ushort *p = path + 1; p[-1]; ++p) realname[p - path] = *p;
#else
const char *realname = f;
#endif
- return (const struct cmeta *)tokenize_buf(realname, buf);
-}
-
-// NOTE: we don't care about conditional includes, nor do we expand macros. We
-// just parse the minimum info to get what we need for SST. Also, there's not
-// too much in the way of syntax checking; if an error gets ignored the compiler
-// picks it up anyway, and gives far better diagnostics.
-void cmeta_includes(const struct cmeta *cm,
- void (*cb)(const char *f, bool issys, void *ctxt), void *ctxt) {
- const Token *tp = (const Token *)cm;
- if (!tp || !tp->next || !tp->next->next) return; // #, include, "string"
- while (tp) {
- if (!tp->at_bol || !equal(tp, "#")) { tp = tp->next; continue; }
- if (!equal(tp->next, "include")) { tp = tp->next->next; continue; }
- tp = tp->next->next;
- if (!tp) break;
- if (tp->at_bol) tp = tp->next;
- if (!tp) break;
- if (tp->kind == TK_STR) {
- // include strings are a special case; they don't have \escapes.
- char *copy = malloc(tp->len - 1);
- if (!copy) die("couldn't allocate memory");
- memcpy(copy, tp->loc + 1, tp->len - 2);
- copy[tp->len - 2] = '\0';
- cb(copy, false, ctxt);
- //free(copy); // ??????
+ struct Token *t = tokenize_buf(realname, ret.sbase);
+ // everything is THING() or THING {} so we need at least 3 tokens ahead - if
+ // we have fewer tokens left in the file we can bail
+ if (t && t->next) while (t->next->next) {
+ if (!t->at_bol) {
+ t = t->next;
+ continue;
}
- else if (equal(tp, "<")) {
- tp = tp->next;
- if (!tp) break;
- const Token *end = tp;
- while (!equal(end, ">")) {
- end = end->next;
- if (!end) return; // shouldn't happen in valid source obviously
- if (end->at_bol) break; // ??????
- }
- char *joined = join_tokens(tp, end); // just use func from chibicc
- cb(joined, true, ctxt);
- //free(joined); // ??????
+ int type;
+ if ((equal(t, "DEF_CVAR") || equal(t, "DEF_CVAR_MIN") ||
+ equal(t, "DEF_CVAR_MAX") || equal(t, "DEF_CVAR_MINMAX") ||
+ equal(t, "DEF_CVAR_UNREG") || equal(t, "DEF_CVAR_MIN_UNREG") ||
+ equal(t, "DEF_CVAR_MAX_UNREG") ||
+ equal(t, "DEF_CVAR_MINMAX_UNREG") ||
+ equal(t, "DEF_FEAT_CVAR") || equal(t, "DEF_FEAT_CVAR_MIN") ||
+ equal(t, "DEF_FEAT_CVAR_MAX") ||
+ equal(t, "DEF_FEAT_CVAR_MINMAX")) && equal(t->next, "(")) {
+ type = CMETA_ITEM_DEF_CVAR;
}
- // get to the next line (standard allows extra tokens because)
- while (!tp->at_bol) {
- tp = tp->next;
- if (!tp) return;
+ else if ((equal(t, "DEF_CCMD") || equal(t, "DEF_CCMD_HERE") ||
+ equal(t, "DEF_CCMD_UNREG") || equal(t, "DEF_CCMD_HERE_UNREG") ||
+ equal(t, "DEF_CCMD_PLUSMINUS") ||
+ equal(t, "DEF_CCMD_PLUSMINUS_UNREG") ||
+ equal(t, "DEF_FEAT_CCMD") || equal(t, "DEF_FEAT_CCMD_HERE") ||
+ equal(t, "DEF_FEAT_CCMD_PLUSMINUS")) && equal(t->next, "(")) {
+ type = CMETA_ITEM_DEF_CCMD;
}
- }
-}
-
-// AGAIN, NOTE: this doesn't *perfectly* match top level decls only in the event
-// that someone writes something weird, but we just don't really care because
-// we're not writing something weird. Don't write something weird!
-void cmeta_conmacros(const struct cmeta *cm,
- void (*cb)(const char *, bool, bool)) {
- const Token *tp = (const Token *)cm;
- if (!tp || !tp->next || !tp->next->next) return; // DEF_xyz, (, name
- while (tp) {
- bool isplusminus = false, isvar = false;
- bool unreg = false;
- // this is like the worst thing ever, but oh well it's just build time
- // XXX: tidy this up some day, though, probably
- if (equal(tp, "DEF_CCMD_PLUSMINUS")) {
- isplusminus = true;
+ else if ((equal(t, "DEF_EVENT") || equal(t, "DEF_PREDICATE")) &&
+ equal(t->next, "(")) {
+ type = CMETA_ITEM_DEF_EVENT;
+ }
+ else if (equal(t, "HANDLE_EVENT") && equal(t->next, "(")) {
+ type = CMETA_ITEM_HANDLE_EVENT;
}
- else if (equal(tp, "DEF_CCMD_PLUSMINUS_UNREG")) {
- isplusminus = true;
- unreg = true;
+ else if (equal(t, "FEATURE") && equal(t->next, "(")) {
+ type = CMETA_ITEM_FEATURE;
}
- else if (equal(tp, "DEF_CVAR") || equal(tp, "DEF_CVAR_MIN") ||
- equal(tp, "DEF_CVAR_MAX") || equal(tp, "DEF_CVAR_MINMAX")) {
- isvar = true;
+ else if ((equal(t, "REQUIRE") || equal(t, "REQUIRE_GAMEDATA") ||
+ equal(t, "REQUIRE_GLOBAL") || equal(t, "REQUEST")) &&
+ equal(t->next, "(")) {
+ type = CMETA_ITEM_REQUIRE;
}
- else if (equal(tp, "DEF_CVAR_UNREG") ||
- equal(tp, "DEF_CVAR_MIN_UNREG") ||
- equal(tp, "DEF_CVAR_MAX_UNREG") ||
- equal(tp, "DEF_CVAR_MINMAX_UNREG")) {
- isvar = true;
- unreg = true;
+ else if (equal(t, "GAMESPECIFIC") && equal(t->next, "(")) {
+ type = CMETA_ITEM_GAMESPECIFIC;
}
- else if (equal(tp, "DEF_CCMD_UNREG") ||
- equal(tp, "DEF_CCMD_HERE_UNREG")) {
- unreg = true;
+ else if (equal(t, "PREINIT") && equal(t->next, "{")) {
+ type = CMETA_ITEM_PREINIT;
}
- else if (!equal(tp, "DEF_CCMD") && !equal(tp, "DEF_CCMD_HERE")) {
- tp = tp->next; continue;
+ else if (equal(t, "INIT") && equal(t->next, "{")) {
+ type = CMETA_ITEM_INIT;
}
- if (!equal(tp->next, "(")) { tp = tp->next->next; continue; }
- tp = tp->next->next;
- if (isplusminus) {
- // XXX: this is stupid but whatever
- char *plusname = malloc(sizeof("PLUS_") + tp->len);
- if (!plusname) die("couldn't allocate memory");
- memcpy(plusname, "PLUS_", 5);
- memcpy(plusname + sizeof("PLUS_") - 1, tp->loc, tp->len);
- plusname[sizeof("PLUS_") - 1 + tp->len] = '\0';
- cb(plusname, false, unreg);
- char *minusname = malloc(sizeof("MINUS_") + tp->len);
- if (!minusname) die("couldn't allocate memory");
- memcpy(minusname, "MINUS_", 5);
- memcpy(minusname + sizeof("MINUS_") - 1, tp->loc, tp->len);
- minusname[sizeof("MINUS_") - 1 + tp->len] = '\0';
- cb(minusname, false, unreg);
+ else if (equal(t, "END") && equal(t->next, "{")) {
+ type = CMETA_ITEM_END;
}
else {
- char *name = malloc(tp->len + 1);
- if (!name) die("couldn't allocate memory");
- memcpy(name, tp->loc, tp->len);
- name[tp->len] = '\0';
- cb(name, isvar, unreg);
+ t = t->next;
+ continue;
}
- tp = tp->next;
+ ret.itemtoks[ret.nitems] = t;
+ ret.itemtypes[ret.nitems] = type;
+ ++ret.nitems;
+ // this is kind of inefficient; in most cases we can skip more stuff,
+ // but then also, we're always scanning for something specific, so who
+ // cares actually, this will do for now.
+ t = t->next->next;
}
+ return ret;
}
-const char *cmeta_findfeatmacro(const struct cmeta *cm) {
- const Token *tp = (const Token *)cm;
- if (!tp || !tp->next) return 0; // FEATURE, (
- while (tp) {
- if (equal(tp, "FEATURE") && equal(tp->next, "(")) {
- if (equal(tp->next->next, ")")) return ""; // no arg = no desc
- if (!tp->next->next || tp->next->next->kind != TK_STR) {
- return 0; // it's invalid, whatever, just return...
- }
- return tp->next->next->str;
- }
- tp = tp->next;
+int cmeta_flags_cvar(const struct cmeta *cm, u32 i) {
+ struct Token *t = cm->itemtoks[i];
+ switch_exhaust (t->len) {
+ // It JUST so happens all of the possible tokens here have a unique
+ // length. I swear this wasn't planned. But it IS convenient!
+ case 8: case 12: case 15: return 0;
+ case 14: case 18: case 21: return CMETA_CVAR_UNREG;
+ case 13: case 17: case 20: return CMETA_CVAR_FEAT;
}
- return 0;
}
-void cmeta_featinfomacros(const struct cmeta *cm, void (*cb)(
- enum cmeta_featmacro type, const char *param, void *ctxt), void *ctxt) {
- const Token *tp = (const Token *)cm;
- if (!tp || !tp->next) return;
- while (tp) {
- int type = -1;
- if (equal(tp, "PREINIT")) {
- type = CMETA_FEAT_PREINIT;
- }
- else if (equal(tp, "INIT")) {
- type = CMETA_FEAT_INIT;
- }
- else if (equal(tp, "END")) {
- type = CMETA_FEAT_END;
- }
- if (type != - 1) {
- if (equal(tp->next, "{")) {
- cb(type, 0, ctxt);
- tp = tp->next;
- }
- tp = tp->next;
- continue;
- }
- if (equal(tp, "REQUIRE")) {
- type = CMETA_FEAT_REQUIRE;
- }
- else if (equal(tp, "REQUIRE_GAMEDATA")) {
- type = CMETA_FEAT_REQUIREGD;
- }
- else if (equal(tp, "REQUIRE_GLOBAL")) {
- type = CMETA_FEAT_REQUIREGLOBAL;
- }
- else if (equal(tp, "REQUEST")) {
- type = CMETA_FEAT_REQUEST;
- }
- if (type != -1) {
- if (equal(tp->next, "(") && tp->next->next) {
- tp = tp->next->next;
- char *param = malloc(tp->len + 1);
- if (!param) die("couldn't allocate memory");
- memcpy(param, tp->loc, tp->len);
- param[tp->len] = '\0';
- cb(type, param, ctxt);
- tp = tp->next;
- }
- }
- tp = tp->next;
+int cmeta_flags_ccmd(const struct cmeta *cm, u32 i) {
+ struct Token *t = cm->itemtoks[i];
+ switch_exhaust (t->len) {
+ case 13: if (t->loc[4] == 'F') return CMETA_CCMD_FEAT;
+ case 8: return 0;
+ case 18: if (t->loc[4] == 'F') return CMETA_CCMD_FEAT;
+ return CMETA_CCMD_PLUSMINUS;
+ case 14: case 19: return CMETA_CCMD_UNREG;
+ case 23: return CMETA_CCMD_FEAT | CMETA_CCMD_PLUSMINUS;
+ case 24: return CMETA_CCMD_UNREG | CMETA_CCMD_PLUSMINUS;
}
}
-struct vec_str VEC(const char *);
+int cmeta_flags_event(const struct cmeta *cm, u32 i) {
+ // assuming CMETA_EVENT_ISPREDICATE remains 1, the ternary should
+ // optimise out
+ return cm->itemtoks[i]->len == 13 ? CMETA_EVENT_ISPREDICATE : 0;
+}
-static void pushmacroarg(const Token *last, const char *start,
- struct vec_str *list) {
- int len = last->loc - start + last->len;
- char *dup = malloc(len + 1);
- if (!dup) die("couldn't allocate memory");
- memcpy(dup, start, len);
- dup[len] = '\0';
- if (!vec_push(list, dup)) die("couldn't append to array");
+int cmeta_flags_require(const struct cmeta *cm, u32 i) {
+ struct Token *t = cm->itemtoks[i];
+ // NOTE: this is somewhat more flexible to enable REQUEST_GAMEDATA or
+ // something in future, although that's kind of useless currently
+ int optflag = t->loc[4] == 'E'; // REQU[E]ST
+ switch_exhaust (t->len) {
+ case 7: return optflag;
+ case 16: return optflag | CMETA_REQUIRE_GAMEDATA;
+ case 14: return optflag | CMETA_REQUIRE_GLOBAL;
+ };
}
-// XXX: maybe this should be used for the other functions too. it'd be less ugly
-// and handle closing parentheses better, but alloc for tokens we don't care
-// about. probably a worthy tradeoff?
-static const Token *macroargs(const Token *t, struct vec_str *list) {
- int paren = 1;
- const Token *last; // avoids copying extra ws/comments in
- for (const char *start = t->loc; t; last = t, t = t->next) {
- if (equal(t, "(")) {
- ++paren;
- }
- else if (equal(t, ")")) {
- if (!--paren) {
- pushmacroarg(last, start, list);
- return t->next;
- }
- }
- else if (paren == 1 && equal(t, ",")) {
- pushmacroarg(last, start, list);
- t = t->next;
- if (t) start = t->loc; // slightly annoying...
- }
+int cmeta_nparams(const struct cmeta *cm, u32 i) {
+ int argc = 1, nest = 0;
+ struct Token *t = cm->itemtoks[i]->next->next;
+ if (equal(t, ")")) return 0; // XXX: stupid special case, surely improvable?
+ for (; t; t = t->next) {
+ if (equal(t, "(")) { ++nest; continue; }
+ if (!nest && equal(t, ",")) ++argc;
+ else if (equal(t, ")") && !nest--) break;
}
- // I guess we handle this here.
- fprintf(stderr, "cmeta: fatal: unexpected EOF in %s\n", t->filename);
- exit(2);
+ if (nest != -1) return 0; // XXX: any need to do anything better here?
+ return argc;
+}
+
+struct cmeta_param_iter cmeta_param_iter_init(const struct cmeta *cm, u32 i) {
+ return (struct cmeta_param_iter){cm->itemtoks[i]->next->next};
}
-void cmeta_evdefmacros(const struct cmeta *cm, void (*cb)(const char *name,
- const char *const *params, int nparams, bool predicate)) {
- const Token *tp = (const Token *)cm;
- if (!tp || !tp->next || !tp->next->next) return; // DEF_EVENT, (, name
- while (tp) {
- bool predicate = true;
- if (equal(tp, "DEF_EVENT") && equal(tp->next, "(")) {
- predicate = false;
+struct cmeta_slice cmeta_param_iter(struct cmeta_param_iter *it) {
+ int nest = 0;
+ const char *start = it->cur->loc;
+ for (struct Token *last = 0; it->cur;
+ last = it->cur, it->cur = it->cur->next) {
+ if (equal(it->cur, "(")) { ++nest; continue; }
+ if (!nest && equal(it->cur, ",")) {
+ if (!last) { // , immediately after (, for some reason. treat as ""
+ return (struct cmeta_slice){start, 0};
+ }
+ it->cur = it->cur->next;
}
- else if (!equal(tp, "DEF_PREDICATE") || !equal(tp->next, "(")) {
- tp = tp->next;
- continue;
+ else if (equal(it->cur, ")") && !nest--) {
+ if (!last) break;
}
- tp = tp->next->next;
- struct vec_str args = {0};
- tp = macroargs(tp, &args);
- if (args.sz == 0) {
- fprintf(stderr, "cmeta: fatal: missing event parameters in %s\n",
- tp->filename);
- exit(2);
+ else {
+ continue;
}
- cb(args.data[0], args.data + 1, args.sz - 1, predicate);
+ return (struct cmeta_slice){start, last->loc - start + last->len};
}
+ return (struct cmeta_slice){0, 0};
}
-void cmeta_evhandlermacros(const struct cmeta *cm, const char *modname,
- void (*cb_handler)(const char *evname, const char *modname)) {
- const Token *tp = (const Token *)cm;
- while (tp) {
- if (equal(tp, "HANDLE_EVENT") && equal(tp->next, "(")) {
- tp = tp->next->next;
- char *name = malloc(tp->len + 1);
- if (!name) die("couldn't allocate memory");
- memcpy(name, tp->loc, tp->len);
- name[tp->len] = '\0';
- cb_handler(name, modname);
- }
- tp = tp->next;
- }
+u32 cmeta_line(const struct cmeta *cm, u32 i) {
+ return cm->itemtoks[i]->line_no;
}
// vi: sw=4 ts=4 noet tw=80 cc=80 fdm=marker