strgen/strgen.c
author truelight
Mon, 09 Aug 2004 17:04:08 +0000
changeset 0 29654efe3188
child 181 0b95567f2d0f
permissions -rw-r--r--
(svn r1) Import of revision 975 of old (crashed) SVN
#include "../stdafx.h"
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>

#if !defined(WIN32) || defined(__CYGWIN__)
#include <unistd.h>
#endif

#ifdef WITH_REV
extern char _openttd_revision[];
#endif

/* Compiles a list of strings into a compiled string list */

#define lengthof(x) (sizeof(x)/sizeof(x[0]))

typedef void (*ParseCmdProc)(char *buf, int value);

typedef struct {
	uint32 ident;
	uint32 version;			// 32-bits of auto generated version info which is basically a hash of strings.h
	char name[32];			// the international name of this language
	char own_name[32];	// the localized name of this language
	uint16 offsets[32];	// the offsets
} LanguagePackHeader;

typedef struct CmdStruct {
	const char *cmd;
	ParseCmdProc proc;
	long value;
} CmdStruct;

typedef struct LineName {
	struct LineName *hash_next;
	int value;
	char str[1];
} LineName; 

int _cur_line;
bool _warnings;

uint32 _hash;
char _lang_name[32], _lang_ownname[32];

#define HASH_SIZE 1023
LineName *_hash_head[HASH_SIZE];
unsigned int hash_str(const char *s) {
	unsigned int hash = 0;
	for(;*s;s++)
		hash = ((hash << 3) | (hash >> 29)) ^ *s;
	return hash % HASH_SIZE;
}
void hash_add(const char *s, int value) {
	unsigned int len = strlen(s);
	LineName *ln = (LineName*)malloc(sizeof(LineName) + len);
	unsigned int hash = hash_str(s);
	ln->hash_next = _hash_head[hash];
	_hash_head[hash] = ln;
	ln->value = value;
	ln->str[len] = 0;
	memcpy(ln->str, s, len);
}

int hash_find(const char *s) {
	LineName *ln = _hash_head[hash_str(s)];
	while (ln != NULL) {
		if (!strcmp(ln->str, s)) {
			return ln->value;
		}
		ln = ln->hash_next;
	}
	return -1;
}

void warning(const char *s, ...) {
	char buf[1024];
	va_list va;
	va_start(va, s);
	vsprintf(buf, s, va);
	va_end(va);
	fprintf(stderr, "%d: ERROR: %s\n", _cur_line, buf);
	_warnings = true;
}

void NORETURN error(const char *s, ...) {
	char buf[1024];
	va_list va;
	va_start(va, s);
	vsprintf(buf, s, va);
	va_end(va);
	fprintf(stderr, "%d: FATAL: %s\n", _cur_line, buf);
	exit(1);
}

void ttd_strlcpy(char *dst, const char *src, size_t len)
{
	assert(len > 0);
	while (--len && *src)
		*dst++=*src++;
	*dst = 0;
}


// first byte tells if it's english.lng or a custom language file
// second part is the name of the string
// third part is the actual string contents
char *allstr[65536];

byte _put_buf[4096];
int _put_pos;
int _next_string_id;

void put_byte(byte c) {
	if (_put_pos == lengthof(_put_buf))
		error("Put buffer too small");
	_put_buf[_put_pos++] = c;
}

void emit_buf(int ent) {
	char *s;

	if (ent < 0 || ent >= 0x10000) {
		warning("Invalid string ID %d\n", ent);
		return;
	}

	if (allstr[ent] != 0) {
		warning("Duplicate string ID %d\n", ent);
		return;
	}

	// Allocate the string, and put the uint16 before with the length.
	s = (char*)malloc(sizeof(uint16) + _put_pos);
	*((uint16*)s) = _put_pos;
	memcpy(s + sizeof(uint16), _put_buf, _put_pos);

	allstr[ent] = s;

	_put_pos = 0;
}

void EmitSingleByte(char *buf, int value) {
	if (*buf != 0)
		warning("Ignoring trailing letters in command");
	put_byte((byte)value);
}

void EmitEscapedByte(char *buf, int value) {
	if (*buf != 0)
		warning("Ignoring trailing letters in command");
	put_byte((byte)0x85);
	put_byte((byte)value);
}


void EmitStringInl(char *buf, int value) {
	int id;

	if (*buf>='0' && *buf <= '9') {
		id = strtol(buf, NULL, 0);
		if (id < 0 || id>=0x10000) {
			warning("Invalid inline num %s\n", buf);
			return;
		}
	} else {
		id = hash_find(buf);
		if (id == -1) {
			warning("Invalid inline string '%s'", buf);
			return;
		}
	}

	put_byte(0x81);
	put_byte((byte)(id & 0xFF));
	put_byte((byte)(id >> 8));
}

void EmitSetX(char *buf, int value) {
	char *err;
	int x = strtol(buf, &err, 0);
	if (*err != 0)
		error("SetX param invalid");
	put_byte(1);
	put_byte((byte)x);
}

void EmitSetXY(char *buf, int value) {
	char *err;
	int x = strtol(buf, &err, 0), y;
	if (*err != 0) error("SetXY param invalid");
	y = strtol(err+1, &err, 0);
	if (*err != 0) error("SetXY param invalid");

	put_byte(0x1F);
	put_byte((byte)x);
	put_byte((byte)y);
}

static const CmdStruct _cmd_structs[] = {
	// Update position
	{"SETX", EmitSetX, 1},
	{"SETXY", EmitSetXY, 2},

	// Font size
	{"TINYFONT", EmitSingleByte, 8},
	{"BIGFONT", EmitSingleByte, 9},

	// New line
	{"", EmitSingleByte, 10},

	// Colors
	{"BLUE", EmitSingleByte, 15},		
	{"SILVER", EmitSingleByte, 16},
	{"GOLD", EmitSingleByte, 17},
	{"RED", EmitSingleByte, 18},
	{"PURPLE", EmitSingleByte, 19},
	{"LTBROWN", EmitSingleByte, 20},
	{"ORANGE", EmitSingleByte, 21},
	{"GREEN", EmitSingleByte, 22},
	{"YELLOW", EmitSingleByte, 23},
	{"DKGREEN", EmitSingleByte, 24},
	{"CREAM", EmitSingleByte, 25},
	{"BROWN", EmitSingleByte, 26},
	{"WHITE", EmitSingleByte, 27},
	{"LTBLUE", EmitSingleByte, 28},
	{"GRAY", EmitSingleByte, 29},
	{"DKBLUE", EmitSingleByte, 30},
	{"BLACK", EmitSingleByte, 31},
	
	// 0x7B=123 is the LAST special character we may use.

	// Numbers
	{"COMMA32", EmitSingleByte, 0x7B},
	{"COMMA16", EmitSingleByte, 0x7C},
	{"COMMA8", EmitSingleByte, 0x7D},
	{"NUMU16", EmitSingleByte, 0x7E},

	{"CURRENCY", EmitSingleByte, 0x7F},
	
	{"CURRCOMPACT", EmitEscapedByte, 0}, // compact currency
	{"INT32", EmitEscapedByte, 1}, // compact currency
	{"REV", EmitEscapedByte, 2}, // openttd revision string
	{"SHORTCARGO", EmitEscapedByte, 3}, // short cargo description, only ### tons, or ### litres
	
	{"STRINL", EmitStringInl, 0x81},

	{"DATE_LONG", EmitSingleByte, 0x82},
	{"DATE_SHORT", EmitSingleByte, 0x83},

	{"VELOCITY", EmitSingleByte, 0x84},
	{"SKIP16", EmitSingleByte, 0x85},
	{"SKIP", EmitSingleByte, 0x86},
	{"VOLUME", EmitSingleByte, 0x87},

	{"STRING", EmitSingleByte, 0x88},

	{"CARGO", EmitSingleByte, 0x99},
	{"STATION", EmitSingleByte, 0x9A},
	{"TOWN", EmitSingleByte, 0x9B},
	{"CURRENCY64", EmitSingleByte, 0x9C},
	{"CHECKPOINT", EmitSingleByte, 0x9D}, // checkpoint name
	// 0x9E=158 is the LAST special character we may use.

	{"UPARROW", EmitSingleByte, 0xA0},
	{"POUNDSIGN", EmitSingleByte, 0xA3},
	{"YENSIGN", EmitSingleByte, 0xA5},
	{"COPYRIGHT", EmitSingleByte, 0xA9},
	{"DOWNARROW", EmitSingleByte, 0xAA},
	{"CHECKMARK", EmitSingleByte, 0xAC},
	{"CROSS", EmitSingleByte, 0xAD},
	{"RIGHTARROW", EmitSingleByte, 0xAF},

	{"SMALLUPARROW", EmitSingleByte, 0xBC},
	{"SMALLDOWNARROW", EmitSingleByte, 0xBD},
	{"THREE_FOURTH", EmitSingleByte, 0xBE},
};

const CmdStruct *find_cmd(const char *s, int len) {
	int i;
	const CmdStruct *cs = _cmd_structs;
	for(i=0; i!=lengthof(_cmd_structs); i++,cs++) {
		if (!strncmp(cs->cmd, s, len) && cs->cmd[len] == 0)
			return cs;
	}
	return NULL;
}


// returns 0 on EOL
// returns 1-255 on literal byte
// else returns command struct
const CmdStruct *parse_command_string(char **str, char *param, const char *errortext)
{
	char *s = *str, *start;
	byte c = *s++;
	const CmdStruct *cmd;

	if (c != '{') {
		*str = s;
		return (CmdStruct*) (int)c;
	}

	// parse command name
	start = s;
	for(;;) {
		c = *s++;
		if (c == '}' || c == ' ')
			break;
		if (c == 0) {
			if (errortext) warning("Missing } from command '%s' in '%s'", start, errortext);
			return NULL;
		}
	}

	cmd = find_cmd(start, s - start - 1);
	if (cmd == NULL) {
		if (errortext) warning("Undefined command '%.*s' in '%s'", s - start - 1, start, errortext);
		return NULL;
	}

	if (c == ' ') {
		// copy params
		start = s;
		for(;;) {
			c = *s++;
			if (c == '}') break;
			if (c == 0) {
				if (errortext) warning("Missing } from command '%s' in '%s'", start, errortext);
				return NULL;
			}
			*param++ = c;
		}

	}
	*param = 0;

	*str = s;

	return cmd;
}


void handle_pragma(char *str)
{
	if (!memcmp(str, "id ", 3)) {
		_next_string_id = strtoul(str + 3, NULL, 0);
	} else if (!memcmp(str, "name ", 5)) {
		ttd_strlcpy(_lang_name, str + 5, sizeof(_lang_name));
	} else if (!memcmp(str, "ownname ", 8)) {
		ttd_strlcpy(_lang_ownname, str + 8, sizeof(_lang_ownname));
	} else {
		error("unknown pragma '%s'", str);
	}
}

bool check_commands_match(char *a, char *b)
{
	const CmdStruct *ar, *br;
	char param[100];

	do {
		// read until next command from a.
		do {
			ar = parse_command_string(&a, param, NULL);
		} while (ar != NULL && (unsigned long)ar <= 255);

		// read until next command from b.
		do {
			br = parse_command_string(&b, param, NULL);
		} while (br != NULL && (unsigned long)br <= 255);

		// make sure they are identical
		if (ar != br) return false;
	} while (ar);

	return true;
}

void handle_string(char *str, bool master) {
	char *s,*t,*r;
	int ent;
		
	if (*str == '#') {
		if (str[1] == '#' && str[2] != '#')
			handle_pragma(str + 2);
		return;
	}

	// Ignore comments & blank lines
	if (*str == ';' || *str == ' ' || *str == 0)
		return;

	s = strchr(str, ':');
	if (s == NULL) {
		warning("Line has no ':' delimiter");
		return;
	}

	// Trim spaces
	for(t=s;t > str && (t[-1]==' ' || t[-1]=='\t'); t--);
	*t = 0;

	ent = hash_find(str);

	s++; // skip :

	// allocate string entry
	r = malloc(strlen(str) + strlen(s) + 3);
	*r = master;
	strcpy(r + 1, str);
	strcpy(r + 2 + strlen(str), s);

	if (master) {
		if (ent != -1) {
			warning("String name '%s' is used multiple times", str);
			return;
		}

		ent = _next_string_id++;
		if (allstr[ent]) {
			warning("String ID 0x%X for '%s' already in use by '%s'", ent, str, allstr[ent] + 1);
			return;
		}
		allstr[ent] = r;

		// add to hash table
		hash_add(str, ent);
	} else {
		if (ent == -1) {
			warning("String name '%s' does not exist in master file", str);
			_warnings = 0; // non-fatal
			return;
		}

		if (!allstr[ent][0]) {
			warning("String name '%s' is used multiple times", str);
			_warnings = 0; // non-fatal
			return;
		}

		// check that the commands match
		if (!check_commands_match(s, allstr[ent] + 2 + strlen(allstr[ent] + 1))) {
			fprintf(stderr, "Warning: String name '%s' does not match the layout of the master string\n", str);
			_warnings = 0; // non-fatal
			return;
		}

		if (s[0] == ':' && s[1] == 0) {
			allstr[ent][0] = 0; // use string from master file legitiately
			free(r);
		} else {
			free(allstr[ent]);
			allstr[ent] = r;
		}
	}
}

uint32 my_hash_str(uint32 hash, const char *s)
{
	for(;*s;s++) {
		hash = ((hash << 3) | (hash >> 29)) ^ *s;
		if (hash & 1) hash = (hash>>1) ^ 0xDEADBEEF; else hash >>= 1;
	}
	return hash;
	
}

void parse_file(const char *file, bool english) {
	char buf[2048];
	int i;
	FILE *in;

	in = fopen(file, "r");
	if (in == NULL) { error("Cannot open file '%s'", file); }
	
	_cur_line = 1;
	while (fgets(buf, sizeof(buf),in) != NULL) {
		i = strlen(buf);
		while (i>0 && (buf[i-1]=='\r' || buf[i-1]=='\n' || buf[i-1] == ' ')) i--;
		buf[i] = 0;

		handle_string(buf, english);
		_cur_line++;
	}

	fclose(in);

	// make a hash of the file to get a unique "version number"
	if (english) {
		uint32 hash = 0;
		char *s;
		const CmdStruct *cs;
		for(i = 0; i!=65536; i++) {
			if ((s=allstr[i]) != NULL) {
				hash ^= i * 0x717239;
				if (hash & 1) hash = (hash>>1) ^ 0xDEADBEEF; else hash >>= 1;
				hash = my_hash_str(hash, s + 1);

				s = s + 2 + strlen(s + 1);
				while ((cs = parse_command_string(&s, buf, NULL)) != 0) {
					if ( (uint)cs >= 256) {
						hash ^= (cs - _cmd_structs) * 0x1234567;
						if (hash & 1) hash = (hash>>1) ^ 0xF00BAA4; else hash >>= 1;
					}
				}
			}	
		}
		_hash = hash;
	}
}

int count_inuse(int grp) {
	int i;

	for(i=0x800; --i >= 0;) {
		if (allstr[(grp<<11)+i] != NULL)
			break;
	}
	return i + 1;
}

void check_all_used() {
	int i;
	LineName *ln;
	int num_warn = 10;

	for (i=0; i!=HASH_SIZE; i++) {
		for(ln = _hash_head[i]; ln!=NULL; ln = ln->hash_next) {
			if (allstr[ln->value] == 0) {
				if (++num_warn < 50) {
					warning("String %s has no definition. Using NULL value", ln->str);
				}
				_put_pos = 0;
				emit_buf(ln->value);
			}
		}
	}
}

void write_length(FILE *f, uint length)
{
	if (length < 0xC0) {
		fputc(length, f);
	} else if (length < 0x4000) {
		fputc((length >> 8) | 0xC0, f);
		fputc(length & 0xFF, f);
	} else {
		error("string too long");
	}
}

void gen_output(FILE *f) {
	uint16 in_use[32];
	uint16 in_use_file[32];
	int i,j;
	int tot_str = 0;

	check_all_used();

	for(i=0; i!=32; i++) {
		int n = count_inuse(i);
		in_use[i] = n;
		in_use_file[i] = TO_LE16(n);
		tot_str += n;
	}

	fwrite(in_use_file, 32*sizeof(uint16), 1, f);

	for(i=0; i!=32; i++) {
		for(j=0; j!=in_use[i]; j++) {
			char *s = allstr[(i<<11)+j];
			if (s == NULL) error("Internal error, s==NULL");
			
			write_length(f, *(uint16*)s);
			fwrite(s + sizeof(uint16), *(uint16*)s , 1, f);
			tot_str--;
		}
	}

	fputc(0, f); // write trailing nul character.

	if (tot_str != 0) {
		error("Internal error, tot_str != 0");
	}
}

bool compare_files(const char *n1, const char *n2)
{
	FILE *f1, *f2;
	char b1[4096];
	char b2[4096];
	size_t l1, l2;

	f2 = fopen(n2, "rb");
	if (f2 == NULL) return false;

	f1 = fopen(n1, "rb");
	if (f1 == NULL) error("can't open %s", n1);

	do {
		l1 = fread(b1, 1, sizeof(b1), f1);
		l2 = fread(b2, 1, sizeof(b2), f2);

		if (l1 != l2 || memcmp(b1, b2, l1)) {
			fclose(f2);
			fclose(f1);
			return false;
		}
	} while (l1);

	fclose(f2);
	fclose(f1);
	return true;
}

void write_strings_h(const char *filename)
{
	FILE *out;
	int i;
	int next = -1;
	int lastgrp;

	out = fopen("tmp.xxx", "w");
	if (out == NULL) { error("can't open tmp.xxx"); }

	fprintf(out, "enum {");

	lastgrp = 0;

	for(i = 0; i!=65536; i++) {
		if (allstr[i]) {
			if (lastgrp != (i >> 11)) {
				lastgrp = (i >> 11);
				fprintf(out, "};\n\nenum {");
			}
			
			fprintf(out, next == i ? "%s,\n" : "\n%s = 0x%X,\n", allstr[i] + 1, i);
			next = i + 1;
		}
	}

	fprintf(out, "};\n");

	fprintf(out, 
		"\nenum {\n"
		"\tLANGUAGE_PACK_IDENT = 'GNAL',\n"
		"\tLANGUAGE_PACK_VERSION = 0x%X,\n"
		"};\n", (unsigned int)_hash);


	fclose(out);

	if (compare_files("tmp.xxx", filename)) {
		// files are equal. tmp.xxx is not needed
		unlink("tmp.xxx");
	} else {
		// else rename tmp.xxx into filename
#if defined(WIN32)
		unlink(filename);
#endif
		if (rename("tmp.xxx", filename) == -1) error("rename() failed");
	}
}

void write_langfile(const char *filename, int show_todo)
{
	FILE *f;
	int in_use[32];
	LanguagePackHeader hdr;
	int i,j;
	const CmdStruct *cs;
	char param[128];

	f = fopen(filename, "wb");
	if (f == NULL) error("can't open %s", filename);

	memset(&hdr, 0, sizeof(hdr));
	for(i=0; i!=32; i++) {
		int n = count_inuse(i);
		in_use[i] = n;
		hdr.offsets[i] = TO_LE16(n);
	}

	
	hdr.ident = TO_LE32('GNAL');
	hdr.version = TO_LE32(_hash);
	strcpy(hdr.name, _lang_name);
	strcpy(hdr.own_name, _lang_ownname);

	fwrite(&hdr, sizeof(hdr), 1, f);

	for(i=0; i!=32; i++) {
		for(j=0; j!=in_use[i]; j++) {
			char *s = allstr[(i<<11)+j], *str;

			if (s == NULL) {
				write_length(f, 0);
			} else {
				// move to string
				str = s + 2 + strlen(s + 1);

				if (show_todo && s[0]) {
					if (show_todo == 2) {
						fprintf(stderr, "Warning:%s: String '%s' is untranslated\n", filename, s + 1);
					} else {
						char *s = "<TODO> ";
						while(*s) put_byte(*s++);
					}
				}

				for(;;) {
					cs = parse_command_string(&str, param, s[0] ? "english.lng" : filename);
					if (cs == NULL) break;
					if ( (unsigned long) cs <= 255) {
						put_byte( (byte) (int)cs);
					} else {
						cs->proc(param, cs->value);
					}
				}

				write_length(f, _put_pos);
				fwrite(_put_buf, 1, _put_pos, f);
				_put_pos = 0;
			}
		}
	}

	fputc(0, f);

	fclose(f);
}

int main(int argc, char* argv[])
{
	char *r;
	char buf[256];
	int show_todo = 0;

	if (argc > 1 && (!strcmp(argv[1], "-v") || !strcmp(argv[1], "--version"))) {
#ifdef WITH_REV
		puts((char*)_openttd_revision);
#endif
		return 0;
	}

	if (argc > 1 && !strcmp(argv[1], "-t")) {
		show_todo = 1;
		argc--, argv++;
	}

	if (argc > 1 && !strcmp(argv[1], "-w")) {
		show_todo = 2;
		argc--, argv++;
	}


	if (argc == 1) {
		// parse master file
		parse_file("lang/english.txt", true);
		if (_warnings) return 1;

		// write english.lng and strings.h
		
		write_langfile("lang/english.lng", 0);
		write_strings_h("table/strings.h");
		
	} else if (argc == 2) {
		parse_file("lang/english.txt", true);
		parse_file(argv[1], false);
		if (_warnings) return 1;
		strcpy(buf, argv[1]);
		r = strrchr(buf, '.');
		if (!r || strcmp(r, ".txt")) r = strchr(buf, 0);
		strcpy(r, ".lng");
		write_langfile(buf, show_todo);
	} else {
		fprintf(stderr, "invalid arguments\n");
	}

	return 0;
}