src/build/cmeta.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263

/*
 * Copyright © Michael Smith <mikesmiffy128@gmail.com>
 *
 * Permission to use, copy, modify, and/or distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
 * THE SOFTWARE IS PROVIDED “AS IS” AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
 */

#include <stdio.h>
#include <stdlib.h>

#include "../intdefs.h"
#include "../langext.h"
#include "../os.h"
#include "cmeta.h"

// lazy inlined 3rd party stuff {{{
// too lazy to write a C tokenizer at the moment, or indeed probably ever, so
// let's just yoink some code from a hacked-up copy of chibicc, a nice minimal C
// compiler with code that's pretty easy to work with. it does leak memory by
// design, but build stuff is all one-shot so that's fine.
#include "../3p/chibicc/chibicc.h"
#include "../3p/chibicc/unicode.c"
// type sentinels from type.c (don't bring in the rest of type.c because it
// circularly depends on other stuff and we really only want tokenize here)
Type *ty_void = &(Type){TY_VOID, 1, 1};
Type *ty_bool = &(Type){TY_BOOL, 1, 1};
Type *ty_char = &(Type){TY_CHAR, 1, 1};
Type *ty_short = &(Type){TY_SHORT, 2, 2};
Type *ty_int = &(Type){TY_INT, 4, 4};
Type *ty_long = &(Type){TY_LONG, 8, 8};
Type *ty_uchar = &(Type){TY_CHAR, 1, 1, true};
Type *ty_ushort = &(Type){TY_SHORT, 2, 2, true};
Type *ty_uint = &(Type){TY_INT, 4, 4, true};
Type *ty_ulong = &(Type){TY_LONG, 8, 8, true};
Type *ty_float = &(Type){TY_FLOAT, 4, 4};
Type *ty_double = &(Type){TY_DOUBLE, 8, 8};
Type *ty_ldouble = &(Type){TY_LDOUBLE, 16, 16};
// inline just a couple more things, super lazy, but whatever
static Type *new_type(TypeKind kind, int size, int align) {
	Type *ty = calloc(1, sizeof(Type));
	ty->kind = kind;
	ty->size = size;
	ty->align = align;
	return ty;
}
Type *array_of(Type *base, int len) {
	Type *ty = new_type(TY_ARRAY, base->size * len, base->align);
	ty->base = base;
	ty->array_len = len;
	return ty;
}
#include "../3p/chibicc/hashmap.c"
#include "../3p/chibicc/strings.c"
#include "../3p/chibicc/tokenize.c"
// }}}

#ifdef _WIN32
#include "../3p/openbsd/asprintf.c" // missing from libc; plonked here for now
#endif

static noreturn die(int status, const char *s) {
	fprintf(stderr, "cmeta: fatal: %s\n", s);
	exit(status);
}

struct cmeta cmeta_loadfile(const os_char *path) {
	int f = os_open_read(path);
	if (f == -1) die(100, "couldn't open file");
	vlong len = os_fsize(f);
	if (len > 1u << 30 - 1) die(2, "input file is far too large");
	struct cmeta ret;
	ret.sbase = malloc(len + 1);
	ret.sbase[len] = '\0'; // chibicc needs a null terminator
	if (!ret.sbase) die(100, "couldn't allocate memory");
	if (os_read(f, ret.sbase, len) != len) die(100, "couldn't read file");
	int maxitems = len / 4; // shortest word is "END"
	ret.nitems = 0;
	// eventual overall memory requirement: file size * 6. seems fine to me.
	// current memory requirement: file size * 10, + all the chibicc linked list
	// crap. not as good but we'll continue tolerating it... probably for years!
	//ret.itemoffs = malloc(maxitems * sizeof(*ret.itemoffs));
	//if (!ret.itemoffs) die(100, "couldn't allocate memory");
	ret.itemtoks = malloc(maxitems * sizeof(*ret.itemtoks));
	if (!ret.itemtoks) die(100, "couldn't allocate memory");
	ret.itemtypes = malloc(maxitems * sizeof(*ret.itemtypes));
	if (!ret.itemtypes) die(100, "couldn't allocate memory");
	os_close(f);
#ifdef _WIN32
	char *realname = malloc(wcslen(path) + 1);
	if (!realname) die(100, "couldn't allocate memory");
	// XXX: being lazy about Unicode right now; a general purpose tool should
	// implement WTF8 or something. SST itself doesn't have any unicode paths
	// though, so we don't really care as much. this code still sucks though.
	*realname = *path;
	for (const ushort *p = path + 1; p[-1]; ++p) realname[p - path] = *p;
#else
	const char *realname = f;
#endif
	struct Token *t = tokenize_buf(realname, ret.sbase);
	// everything is THING() or THING {} so we need at least 3 tokens ahead - if
	// we have fewer tokens left in the file we can bail
	if (t && t->next) while (t->next->next) {
		if (!t->at_bol) {
			t = t->next;
			continue;
		}
		int type;
		if ((equal(t, "DEF_CVAR") || equal(t, "DEF_CVAR_MIN") ||
				equal(t, "DEF_CVAR_MAX") || equal(t, "DEF_CVAR_MINMAX") ||
				equal(t, "DEF_CVAR_UNREG") || equal(t, "DEF_CVAR_MIN_UNREG") ||
				equal(t, "DEF_CVAR_MAX_UNREG") ||
				equal(t, "DEF_CVAR_MINMAX_UNREG") ||
				equal(t, "DEF_FEAT_CVAR") || equal(t, "DEF_FEAT_CVAR_MIN") ||
				equal(t, "DEF_FEAT_CVAR_MAX") ||
				equal(t, "DEF_FEAT_CVAR_MINMAX")) && equal(t->next, "(")) {
			type = CMETA_ITEM_DEF_CVAR;
		}
		else if ((equal(t, "DEF_CCMD") || equal(t, "DEF_CCMD_HERE") ||
				equal(t, "DEF_CCMD_UNREG") || equal(t, "DEF_CCMD_HERE_UNREG") ||
				equal(t, "DEF_CCMD_PLUSMINUS") ||
				equal(t, "DEF_CCMD_PLUSMINUS_UNREG") ||
				equal(t, "DEF_FEAT_CCMD") || equal(t, "DEF_FEAT_CCMD_HERE") ||
				equal(t, "DEF_FEAT_CCMD_PLUSMINUS")) && equal(t->next, "(")) {
			type = CMETA_ITEM_DEF_CCMD;
		}
		else if ((equal(t, "DEF_EVENT") || equal(t, "DEF_PREDICATE")) &&
				equal(t->next, "(")) {
			type = CMETA_ITEM_DEF_EVENT;
		}
		else if (equal(t, "HANDLE_EVENT") && equal(t->next, "(")) {
			type = CMETA_ITEM_HANDLE_EVENT;
		}
		else if (equal(t, "FEATURE") && equal(t->next, "(")) {
			type = CMETA_ITEM_FEATURE;
		}
		else if ((equal(t, "REQUIRE") || equal(t, "REQUIRE_GAMEDATA") ||
				equal(t, "REQUIRE_GLOBAL") || equal(t, "REQUEST")) &&
				equal(t->next, "(")) {
			type = CMETA_ITEM_REQUIRE;
		}
		else if (equal(t, "GAMESPECIFIC") && equal(t->next, "(")) {
			type = CMETA_ITEM_GAMESPECIFIC;
		}
		else if (equal(t, "PREINIT") && equal(t->next, "{")) {
			type = CMETA_ITEM_PREINIT;
		}
		else if (equal(t, "INIT") && equal(t->next, "{")) {
			type = CMETA_ITEM_INIT;
		}
		else if (equal(t, "END") && equal(t->next, "{")) {
			type = CMETA_ITEM_END;
		}
		else {
			t = t->next;
			continue;
		}
		ret.itemtoks[ret.nitems] = t;
		ret.itemtypes[ret.nitems] = type;
		++ret.nitems;
		// this is kind of inefficient; in most cases we can skip more stuff,
		// but then also, we're always scanning for something specific, so who
		// cares actually, this will do for now.
		t = t->next->next;
	}
	return ret;
}

int cmeta_flags_cvar(const struct cmeta *cm, u32 i) {
	struct Token *t = cm->itemtoks[i];
	switch_exhaust (t->len) {
		// It JUST so happens all of the possible tokens here have a unique
		// length. I swear this wasn't planned. But it IS convenient!
		case 8: case 12: case 15: return 0;
		case 14: case 18: case 21: return CMETA_CVAR_UNREG;
		case 13: case 17: case 20: return CMETA_CVAR_FEAT;
	}
}

int cmeta_flags_ccmd(const struct cmeta *cm, u32 i) {
	struct Token *t = cm->itemtoks[i];
	switch_exhaust (t->len) {
		case 13: if (t->loc[4] == 'F') return CMETA_CCMD_FEAT;
		case 8: return 0;
		case 18: if (t->loc[4] == 'F') return CMETA_CCMD_FEAT;
			return CMETA_CCMD_PLUSMINUS;
		case 14: case 19: return CMETA_CCMD_UNREG;
		case 23: return CMETA_CCMD_FEAT | CMETA_CCMD_PLUSMINUS;
		case 24: return CMETA_CCMD_UNREG | CMETA_CCMD_PLUSMINUS;
	}
}

int cmeta_flags_event(const struct cmeta *cm, u32 i) {
	// assuming CMETA_EVENT_ISPREDICATE remains 1, the ternary should
	// optimise out
	return cm->itemtoks[i]->len == 13 ? CMETA_EVENT_ISPREDICATE : 0;
}

int cmeta_flags_require(const struct cmeta *cm, u32 i) {
	struct Token *t = cm->itemtoks[i];
	// NOTE: this is somewhat more flexible to enable REQUEST_GAMEDATA or
	// something in future, although that's kind of useless currently
	int optflag = t->loc[4] == 'E'; // REQU[E]ST
	switch_exhaust (t->len) {
		case 7: return optflag;
		case 16: return optflag | CMETA_REQUIRE_GAMEDATA;
		case 14: return optflag | CMETA_REQUIRE_GLOBAL;
	};
}

int cmeta_nparams(const struct cmeta *cm, u32 i) {
	int argc = 1, nest = 0;
	struct Token *t = cm->itemtoks[i]->next->next;
	if (equal(t, ")")) return 0; // XXX: stupid special case, surely improvable?
	for (; t; t = t->next) {
		if (equal(t, "(")) { ++nest; continue; }
		if (!nest && equal(t, ",")) ++argc;
		else if (equal(t, ")") && !nest--) break;
	}
	if (nest != -1) return 0; // XXX: any need to do anything better here?
	return argc;
}

struct cmeta_param_iter cmeta_param_iter_init(const struct cmeta *cm, u32 i) {
	return (struct cmeta_param_iter){cm->itemtoks[i]->next->next};
}

struct cmeta_slice cmeta_param_iter(struct cmeta_param_iter *it) {
	int nest = 0;
	const char *start = it->cur->loc;
	for (struct Token *last = 0; it->cur;
			last = it->cur, it->cur = it->cur->next) {
		if (equal(it->cur, "(")) { ++nest; continue; }
		if (!nest && equal(it->cur, ",")) {
			if (!last) { // , immediately after (, for some reason. treat as ""
				return (struct cmeta_slice){start, 0};
			}
			it->cur = it->cur->next;
		}
		else if (equal(it->cur, ")") && !nest--) {
			if (!last) break;
		}
		else {
			continue;
		}
		return (struct cmeta_slice){start, last->loc - start + last->len};
	}
	return (struct cmeta_slice){0, 0};
}

u32 cmeta_line(const struct cmeta *cm, u32 i) {
	return cm->itemtoks[i]->line_no;
}

// vi: sw=4 ts=4 noet tw=80 cc=80 fdm=marker