diff options
| author | 2022-02-24 00:47:05 +0000 | |
|---|---|---|
| committer | 2022-03-19 03:51:45 +0000 | |
| commit | 6818b362a776f0cc5a6068ed119dc2ebcbc5a9cc (patch) | |
| tree | d2f32f226229cdfce0c61540396f4a7d3a4a8ced /src | |
| parent | 98378138a521fa52758f1ed3501900e6c323c474 (diff) | |
| download | sst-6818b362a776f0cc5a6068ed119dc2ebcbc5a9cc.tar.gz sst-6818b362a776f0cc5a6068ed119dc2ebcbc5a9cc.zip | |
Fix some old KV parser issues
- Implement conditionals in the lexer and reject or ignore them in
  callbacks. This will allow something to use them later if needed.
- Make error handling less stupid (return a bool instead of using the
  state struct).
Diffstat (limited to 'src')
| -rw-r--r-- | src/build/mkgamedata.c | 11 | ||||
| -rw-r--r-- | src/gameinfo.c | 12 | ||||
| -rw-r--r-- | src/kv.c | 169 | ||||
| -rw-r--r-- | src/kv.h | 27 | 
4 files changed, 142 insertions, 77 deletions
| diff --git a/src/build/mkgamedata.c b/src/build/mkgamedata.c index ca2e130..e2e59ff 100644 --- a/src/build/mkgamedata.c +++ b/src/build/mkgamedata.c @@ -1,5 +1,5 @@  /* - * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com> + * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>   *   * Permission to use, copy, modify, and/or distribute this software for any   * purpose with or without fee is hereby granted, provided that the above @@ -144,6 +144,9 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *ctxt) {  				*ents_tail = e;  				ents_tail = &e->next;  			} +			break; +		case KV_COND_PREFIX: case KV_COND_SUFFIX: +			badparse(state, "unexpected conditional");  	}  } @@ -165,11 +168,9 @@ int OS_MAIN(int argc, os_char *argv[]) {  		int nread;  		while (nread = read(fd, buf, sizeof(buf))) {  			if (nread == -1) die("couldn't read file"); -			kv_parser_feed(&kv, buf, nread, &kv_cb, &state); -			if (kv.state == KV_PARSER_ERROR) goto ep; +			if (!kv_parser_feed(&kv, buf, nread, &kv_cb, &state)) goto ep;  		} -		kv_parser_done(&kv); -		if (kv.state == KV_PARSER_ERROR) { +		if (!kv_parser_done(&kv)) {  ep:			fprintf(stderr, "mkgamedata: %" fS ":%d:%d: bad syntax: %s\n",  					*argv, kv.line, kv.col, kv.errmsg);  			exit(1); diff --git a/src/gameinfo.c b/src/gameinfo.c index a5f1a42..4af5df7 100644 --- a/src/gameinfo.c +++ b/src/gameinfo.c @@ -1,5 +1,5 @@  /* - * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com> + * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>   *   * Permission to use, copy, modify, and/or distribute this software for any   * purpose with or without fee is hereby granted, provided that the above @@ -228,6 +228,10 @@ static void kv_cb(enum kv_token type, const char *p, uint len, void *_ctxt) {  			break;  		case KV_NEST_END:  			if (ctxt->dontcarelvl) --ctxt->dontcarelvl; else --ctxt->nestlvl; +			break; +		case KV_COND_PREFIX: case KV_COND_SUFFIX: +			con_warn("gameinfo: warning: just ignoring conditional \"%.*s\"", +					len, p);  	}  	#undef MATCH  } @@ -353,11 +357,9 @@ bool gameinfo_init(void) {  					strerror(errno));  			goto e;  		} -		kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt); -		if (kvp.state == KV_PARSER_ERROR) goto ep; +		if (!kv_parser_feed(&kvp, buf, nread, &kv_cb, &ctxt)) goto ep;  	} -	kv_parser_done(&kvp); -	if (kvp.state == KV_PARSER_ERROR) goto ep; +	if (!kv_parser_done(&kvp)) goto ep;  	close(fd);  	return true; @@ -1,5 +1,5 @@  /* - * Copyright © 2021 Michael Smith <mikesmiffy128@gmail.com> + * Copyright © 2022 Michael Smith <mikesmiffy128@gmail.com>   *   * Permission to use, copy, modify, and/or distribute this software for any   * purpose with or without fee is hereby granted, provided that the above @@ -18,10 +18,22 @@  #include "intdefs.h"  #include "kv.h" +#include "unreachable.h"  #define EOF -1 -void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, +// parser states, implemented by STATE() macros in kv_parser_feed() below. +// needs to be kept in sync! +enum { +	ok, ok_slash, +	ident, ident_slash, identq, +	sep, sep_slash, condsep, condsep_slash, +	cond_prefix, +	val, val_slash, valq, afterval, afterval_slash, +	cond_suffix +}; + +bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz,  		kv_parser_cb cb, void *ctxt) {  	const char *p = in;  	short c; @@ -34,9 +46,8 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,  	#define INCCOL() (*p == '\n' ? (++this->line, this->col = 0) : ++this->col)  	#define READ() (p == in + sz ? EOF : (INCCOL(), *p++))  	#define ERROR(s) do { \ -		this->state = KV_PARSER_ERROR; \  		this->errmsg = s; \ -		return; \ +		return false; \  	} while (0)  	#define OUT(c) do { \  		if (this->outp - this->tokbuf == KV_TOKEN_MAX) { \ @@ -48,7 +59,7 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,  	// note: multi-eval  	#define IS_WS(c) ((c) == ' ' || (c) == '\t' || (c) == '\n' || (c) == '\r')  	#define STATE(s) case s: s -	#define HANDLE_EOF() do { case EOF: return; } while (0) +	#define HANDLE_EOF() do { case EOF: return true; } while (0)  	#define SKIP_COMMENT(next) do { \  		this->state = next; \  		this->incomment = true; \ @@ -59,29 +70,31 @@ void kv_parser_feed(struct kv_parser *this, const char *in, uint sz,  		cb(type, this->tokbuf, this->outp - this->tokbuf, ctxt); \  		this->outp = this->tokbuf; \  	} while (0) - -	// parser states, implemented by STATE() macros below -	enum { -		ok, -		ok_slash, -		ident, -		ident_slash, -		identq, -		sep, -		sep_slash, -		val, -		val_slash, -		valq -	}; +	// prefix and suffix conditions are more or less the same, just in different +	// contexts, because very good syntax yes. +	#define CONDSTATE(name, type, next) do { \ +		STATE(name): \ +			switch (c = READ()) { \ +				HANDLE_EOF(); \ +				CASE_WS: ERROR("unexpected whitespace in conditional"); \ +				case '[': ERROR("unexpected opening bracket in conditional"); \ +				case '{': case '}': ERROR("unexpected brace in conditional"); \ +				case '/': ERROR("unexpected slash in conditional"); \ +				case ']': CB(type); GOTO(next); \ +				default: OUT(c); goto name; \ +			} \ +	} while (0)  start: // special spaghetti so we don't have a million different comment states -	if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return; +	if (this->incomment) while ((c = READ()) != '\n') if (c == EOF) return true;  	this->incomment = false;  switch (this->state) {  STATE(ok): -	switch (c = READ()) { +	c = READ(); +ident_postread: +	switch (c) {  		HANDLE_EOF();  		CASE_WS: goto ok;  		case '#': ERROR("kv macros not supported"); @@ -94,6 +107,7 @@ STATE(ok):  			goto ok;  		case '"': GOTO(identq);  		case '/': GOTO(ok_slash); +		case '[': case ']': ERROR("unexpected conditional bracket");  		default: GOTO(ident);  	} @@ -101,7 +115,7 @@ STATE(ok_slash):  	switch (c = READ()) {  		HANDLE_EOF();  		case '/': SKIP_COMMENT(ok); -		default: OUT('/'); GOTO(ident); +		default: GOTO(ident);  	}  ident: @@ -115,10 +129,12 @@ case ident: // continue here  			char c_ = c;  			cb(KV_NEST_START, &c_, 1, ctxt);  			GOTO(ok); -		case '}': case '"': ERROR("unexpected control character"); -		CASE_WS: -			CB(KV_IDENT); -			GOTO(sep); +		// XXX: assuming [ is a token break; haven't checked Valve's code +		case '[': CB(KV_IDENT); GOTO(cond_prefix); +		case '}': ERROR("unexpected closing brace"); +		case ']': ERROR("unexpected closing bracket"); +		case '"': ERROR("unexpected quote mark"); +		CASE_WS: CB(KV_IDENT); GOTO(sep);  		case '/': GOTO(ident_slash);  		default: goto ident;  	} @@ -126,18 +142,14 @@ case ident: // continue here  STATE(ident_slash):  	switch (c = READ()) {  		HANDLE_EOF(); -		case '/': -			CB(KV_IDENT); -			SKIP_COMMENT(sep); -		default: OUT('/'); GOTO(ident); +		case '/': CB(KV_IDENT); SKIP_COMMENT(sep); +		default: GOTO(ident);  	}  STATE(identq):  	switch (c = READ()) {  		HANDLE_EOF(); -		case '"': -			CB(KV_IDENT_QUOTED); -			GOTO(sep); +		case '"': CB(KV_IDENT_QUOTED); GOTO(sep);  		default: OUT(c); goto identq;  	} @@ -145,14 +157,15 @@ STATE(sep):  	do c = READ(); while (IS_WS(c));  	switch (c) {  		HANDLE_EOF(); -		case '[': ERROR("conditionals not supported");  		case '{':;  			char c_ = c;  			++this->nestlvl;  			cb(KV_NEST_START, &c_, 1, ctxt);  			GOTO(ok); +		case '[': GOTO(cond_prefix);  		case '"': GOTO(valq); -		case '}': ERROR("unexpected control character"); +		case '}': ERROR("unexpected closing brace"); +		case ']': ERROR("unexpected closing bracket");  		case '/': GOTO(sep_slash);  		default: GOTO(val);  	} @@ -161,7 +174,33 @@ STATE(sep_slash):  	switch (c = READ()) {  		HANDLE_EOF();  		case '/': SKIP_COMMENT(sep); -		default: OUT('/'); GOTO(val); +		default: GOTO(val); +	} + +CONDSTATE(cond_prefix, KV_COND_PREFIX, condsep); + +STATE(condsep): +	do c = READ(); while (IS_WS(c)); +	switch (c) { +		HANDLE_EOF(); +		case '{':; +			char c_ = c; +			++this->nestlvl; +			cb(KV_NEST_START, &c_, 1, ctxt); +			GOTO(ok); +		case '}': ERROR("unexpected closing brace"); +		case '[': ERROR("unexpected opening bracket"); +		case ']': ERROR("unexpected closing bracket"); +		case '/': GOTO(condsep_slash); +		// these conditions only go before braces because very good syntax +		default: ERROR("unexpected string value after prefix condition"); +	} + +STATE(condsep_slash): +	switch (c = READ()) { +		HANDLE_EOF(); +		case '/': SKIP_COMMENT(condsep); +		default: ERROR("unexpected string value after prefix condition");  	}  val: @@ -169,17 +208,18 @@ val:  case val: // continue here  	switch (c = READ()) {  		HANDLE_EOF(); -		case '{': case '"': ERROR("unexpected control character"); -		// might get } with no whitespace +		case '{': ERROR("unexpected opening brace"); +		case ']': ERROR("unexpected closing bracket"); +		case '"': ERROR("unexpected quotation mark"); +		// might get [ or } with no whitespace  		case '}':  			CB(KV_VAL);  			--this->nestlvl;  			char c_ = c;  			cb(KV_NEST_END, &c_, 1, ctxt); -			GOTO(ok); -		CASE_WS: -			CB(KV_VAL); -			GOTO(ok); +			GOTO(afterval); +		case '[': CB(KV_VAL); GOTO(cond_suffix); +		CASE_WS: CB(KV_VAL); GOTO(afterval);  		case '/': GOTO(val_slash);  		default: goto val;  	} @@ -187,23 +227,41 @@ case val: // continue here  STATE(val_slash):  	switch (c = READ()) {  		HANDLE_EOF(); -		case '/': -			CB(KV_VAL); -			SKIP_COMMENT(ok); -		default: OUT('/'); GOTO(val); +		case '/': CB(KV_VAL); SKIP_COMMENT(afterval); +		default: GOTO(val);  	}  STATE(valq):  	switch (c = READ()) {  		HANDLE_EOF(); -		case '"': -			CB(KV_VAL_QUOTED); -			GOTO(ok); +		case '"': CB(KV_VAL_QUOTED); GOTO(afterval);  		default: OUT(c); goto valq;  	} +STATE(afterval): +	switch (c = READ()) { +		HANDLE_EOF(); +		CASE_WS: goto afterval; +		case '[': GOTO(cond_suffix); +		case '/': GOTO(afterval_slash); +		// mildly dumb hack: if no conditional, we can just use the regular +		// starting state handler to get next transition correct - just avoid +		// double-reading the character +		default: goto ident_postread; +	} + +STATE(afterval_slash): +	switch (c = READ()) { +		HANDLE_EOF(); +		case '/': SKIP_COMMENT(afterval); +		default: GOTO(ident); +	} + +CONDSTATE(cond_suffix, KV_COND_SUFFIX, ok); +  } +	#undef CONDSTATE  	#undef CB  	#undef GOTO  	#undef SKIP_COMMENT @@ -215,17 +273,20 @@ STATE(valq):  	#undef ERROR  	#undef READ  	#undef INCCOL + +	unreachable; // pretty sure!  } -void kv_parser_done(struct kv_parser *this) { -	if (this->state > 0) { -		this->state = -1; +bool kv_parser_done(struct kv_parser *this) { +	if (this->state != ok && this->state != afterval) {  		this->errmsg = "unexpected end of input"; +		return false;  	} -	else if (this->state == 0 && this->nestlvl != 0) { -		this->state = -1; +	if (this->nestlvl != 0) {  		this->errmsg = "unterminated object (unbalanced braces)"; +		return false;  	} +	return true;  }  // vi: sw=4 ts=4 noet tw=80 cc=80 @@ -34,8 +34,8 @@   */  struct kv_parser {  	ushort line, col;	/* the current line and column in the text */ -	schar state;		/* internal, shouldn't usually be touched directly */ -	bool incomment;		/* internal */ +	char state : 7;		/* internal, shouldn't usually be touched directly */ +	bool incomment : 1;	/* internal */  	ushort nestlvl;		/* internal */  	const char *errmsg; /* the error message, *IF* parsing just failed */ @@ -46,8 +46,6 @@ struct kv_parser {  	char tokbuf[KV_TOKEN_MAX];  }; -#define KV_PARSER_ERROR -1 -  /*   * These are the tokens that can be received by a kv_parser_cb (below).   * The x-macro and string descriptions are given to allow for easy debug @@ -61,6 +59,8 @@ struct kv_parser {  	X(KV_IDENT_QUOTED, "quoted-ident") \  	X(KV_VAL, "value") \  	X(KV_VAL_QUOTED, "quoted-value") \ +	X(KV_COND_PREFIX, "cond-prefix") \ +	X(KV_COND_SUFFIX, "cond-suffix") \  	X(KV_NEST_START, "object-start") \  	X(KV_NEST_END, "object-end") @@ -76,20 +76,21 @@ typedef void (*kv_parser_cb)(enum kv_token type, const char *p, uint len,   * read in from a file.   *   * The lexer is reentrant and can be fed arbitrarily sized blocks of data at a - * time. The function may return early in the event of an error; you must check - * if parser->state == KV_PARSER_ERROR between calls! Continuing to try parsing - * after an error is undefined. + * time. The function may return early in the event of an error; a return value + * of false indicates thaat this has happened, otherwise true is returned. + * + * In the event of an error, the errmsg, line and col fields of the parser + * struct can be used for diagnostics.   */ -// FIXME: revise API usage so errors aren't passed through "state" value -void kv_parser_feed(struct kv_parser *this, const char *in, uint sz, +bool kv_parser_feed(struct kv_parser *this, const char *in, uint sz,  		kv_parser_cb cb, void *ctxt);  /* - * This indicates that parsing is done; if the state is midway through a token - * this will be converted into an error state which can be checked in the same - * way as noted above. + * This indicates that parsing is done; if this is called at an unexpected time, + * a parsing error will result; this is indicated in the return value as with + * kv_parser_feed.   */ -void kv_parser_done(struct kv_parser *this); +bool kv_parser_done(struct kv_parser *this);  #endif | 
