(svn r1929) Feature: [namegen] Support for dynamic generation of the Czech town names.
authorpasky
Sat, 05 Mar 2005 21:00:13 +0000
changeset 1425 d566470aa3ca
parent 1424 c6d120592e98
child 1426 0a215fc32f96
(svn r1929) Feature: [namegen] Support for dynamic generation of the Czech town names.

The static names are still used in 1/4 of cases. I think the tables for
dynamic generation must look pretty spectacular. :-) New stems are still
needed and there can be occasional glitches, please let me know.

I guess that this method of dynamic generation could be used for at least
Slovak town names, too. And possibly other Slavic languages?
namegen.c
table/namegen.h
--- a/namegen.c	Sat Mar 05 18:44:26 2005 +0000
+++ b/namegen.c	Sat Mar 05 21:00:13 2005 +0000
@@ -318,7 +318,160 @@
 
 static byte MakeCzechTownName(char *buf, uint32 seed)
 {
-	strcpy(buf, name_czech_real[SeedChance(0, lengthof(name_czech_real), seed)]);
+	/* Probability of prefixes/suffixes */
+	/* 0..11 prefix, 12..13 prefix+suffix, 14..17 suffix, 18..31 nothing */
+	int prob_tails;
+	bool do_prefix, do_suffix, dynamic_subst;
+	/* IDs of the respective parts */
+	int prefix = 0, stem = 0, postfix = 0, ending = 0, suffix = 0;
+	/* The select criteria. */
+	enum CzechGender gender;
+	enum CzechChoose choose;
+	enum CzechAllow allow;
+
+	// 1:3 chance to use a real name.
+	if (SeedChance(0, 4, seed) == 0) {
+		strcpy(buf, name_czech_real[SeedChance(1, lengthof(name_czech_real), seed)]);
+		return 0;
+	}
+
+	// NUL terminates the string for strcat()
+	strcpy(buf, "");
+
+	prob_tails = SeedChance(2, 32, seed);
+	do_prefix = prob_tails < 12;
+	do_suffix = prob_tails > 11 && prob_tails < 17;
+
+	if (do_prefix) prefix = SeedChance(5, lengthof(name_czech_adj), seed);
+	if (do_suffix) suffix = SeedChance(7, lengthof(name_czech_suffix), seed);
+	// 3:1 chance 3:1 to use dynamic substantive
+	stem = SeedChance(9, lengthof(name_czech_subst_full)
+	                     + 3 * lengthof(name_czech_subst_stem),
+	                   seed);
+	if (stem < (int) lengthof(name_czech_subst_full)) {
+		// That was easy!
+		dynamic_subst = false;
+		gender = name_czech_subst_full[stem].gender;
+		choose = name_czech_subst_full[stem].choose;
+		allow = name_czech_subst_full[stem].allow;
+
+	} else {
+		unsigned int map[lengthof(name_czech_subst_ending)];
+		int ending_start = -1, ending_stop = -1;
+		int i;
+
+		// Load the substantive
+		dynamic_subst = true;
+		stem -= lengthof(name_czech_subst_full);
+		stem %= lengthof(name_czech_subst_stem);
+		gender = name_czech_subst_stem[stem].gender;
+		choose = name_czech_subst_stem[stem].choose;
+		allow = name_czech_subst_stem[stem].allow;
+
+		// Load the postfix (1:1 chance that a postfix will be inserted)
+		postfix = SeedChance(14, lengthof(name_czech_subst_postfix) * 2, seed);
+
+		if (choose & CZC_POSTFIX) {
+			// Always get a real postfix.
+			postfix %= lengthof(name_czech_subst_postfix);
+		}
+		if (choose & CZC_NOPOSTFIX) {
+			// Always drop a postfix.
+			postfix += lengthof(name_czech_subst_postfix);
+		}
+		if (postfix < (int) lengthof(name_czech_subst_postfix))
+			choose |= CZC_POSTFIX;
+		else
+			choose |= CZC_NOPOSTFIX;
+
+		// Localize the array segment containing a good gender
+		for (ending = 0; ending < (int) lengthof(name_czech_subst_ending); ending++) {
+			const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
+
+			if (gender == CZG_FREE
+			    || (gender == CZG_NFREE && e->gender != CZG_SNEUT && e->gender != CZG_PNEUT)
+			    || (gender == e->gender)) {
+				if (ending_start < 0)
+					ending_start = ending;
+
+			} else if (ending_start >= 0) {
+				ending_stop = ending - 1;
+				break;
+			}
+		}
+		if (ending_stop < 0) {
+			// Whoa. All the endings matched.
+			ending_stop = ending - 1;
+		}
+
+		// Make a sequential map of the items with good mask
+		i = 0;
+		for (ending = ending_start; ending <= ending_stop; ending++) {
+			const struct CzechNameSubst *e = &name_czech_subst_ending[ending];
+
+			if ((e->choose & choose) == choose && (e->allow & allow) != 0)
+				map[i++] = ending;
+		}
+		assert(i > 0);
+
+		// Load the ending
+		ending = map[SeedChance(16, i, seed)];
+		// Override possible CZG_*FREE; this must be a real gender,
+		// otherwise we get overflow when modifying the adjectivum.
+		gender = name_czech_subst_ending[ending].gender;
+		assert(gender != CZG_FREE && gender != CZG_NFREE);
+	}
+
+	if (do_prefix && (name_czech_adj[prefix].choose & choose) != choose) {
+		// Throw away non-matching prefix.
+		do_prefix = false;
+	}
+
+	// Now finally construct the name
+
+	if (do_prefix) {
+		enum CzechPattern pattern = name_czech_adj[prefix].pattern;
+		int endpos;
+
+		strcat(buf, name_czech_adj[prefix].name);
+		endpos = strlen(buf) - 1;
+		if (gender == CZG_SMASC && pattern == CZP_PRIVL) {
+			/* -ovX -> -uv */
+			buf[endpos - 2] = 'u';
+			assert(buf[endpos - 1] == 'v');
+			buf[endpos] = '\0';
+		} else {
+			buf[endpos] = name_czech_patmod[gender][pattern];
+		}
+
+		strcat(buf, " ");
+	}
+
+	if (dynamic_subst) {
+		strcat(buf, name_czech_subst_stem[stem].name);
+		if (postfix < (int) lengthof(name_czech_subst_postfix)) {
+			int postlen, endlen;
+
+			postlen = strlen(name_czech_subst_postfix[postfix]);
+			endlen = strlen(name_czech_subst_ending[ending].name);
+			// Kill the "avava" and "Jananna"-like cases
+			if (2 > postlen || postlen > endlen
+			    || (name_czech_subst_postfix[postfix][1]
+			           != name_czech_subst_ending[ending].name[1]
+			        && name_czech_subst_postfix[postfix][2]
+			           != name_czech_subst_ending[ending].name[1]))
+				strcat(buf, name_czech_subst_postfix[postfix]);
+		}
+		strcat(buf, name_czech_subst_ending[ending].name);
+	} else {
+		strcat(buf, name_czech_subst_full[stem].name);
+	}
+
+	if (do_suffix) {
+		strcat(buf, " ");
+		strcat(buf, name_czech_suffix[suffix]);
+	}
+
 	return 0;
 }
 
--- a/table/namegen.h	Sat Mar 05 18:44:26 2005 +0000
+++ b/table/namegen.h	Sat Mar 05 21:00:13 2005 +0000
@@ -1659,6 +1659,228 @@
 	"Znojmo"
 };
 
+
+/* The advanced hyperintelligent Czech town names generator! */
+
+// Sing., pl.
+enum CzechGender {
+	CZG_SMASC,
+	CZG_SFEM,
+	CZG_SNEUT,
+	CZG_PMASC,
+	CZG_PFEM,
+	CZG_PNEUT,
+	// Special for substantive stems - the ending chooses the gender.
+	CZG_FREE,
+	// Like CZG_FREE, but disallow CZG_SNEUT.
+	CZG_NFREE
+};
+enum CzechPattern {
+	CZP_JARNI,
+	CZP_MLADY,
+	CZP_PRIVL
+};
+/* [CzechGender][CzechPattern] - replaces the last character of the adjective
+ * by this. */
+// XXX: [CZG_SMASC][CZP_PRIVL] needs special handling: -ovX -> -uv.
+static const char name_czech_patmod[6][3] = {
+	/* CZG_SMASC */ { 'í', 'ý', 'X' },
+	/* CZG_SFEM */  { 'í', 'á', 'a' },
+	/* CZG_SNEUT */ { 'í', 'é', 'o' },
+	/* CZG_PMASC */ { 'í', 'é', 'y' },
+	/* CZG_PFEM */  { 'í', 'é', 'y' },
+	/* CZG_PNEUT */ { 'í', 'á', 'a' }
+};
+
+// This way the substantives can choose only some adjectives/endings:
+// At least one of these flags must be satisfied:
+enum CzechAllow {
+	CZA_SHORT = 1,
+	CZA_MIDDLE = 2,
+	CZA_LONG = 4,
+	CZA_ALL = ~0
+};
+// All these flags must be satisfied (in the stem->others direction):
+enum CzechChoose {
+	CZC_NORMAL = 1,
+	CZC_COLOR = 2,
+	CZC_POSTFIX = 4, // Matched if postfix was inserted.
+	CZC_NOPOSTFIX = 8, // Matched if no postfix was inserted.
+	CZC_ANY = ~0
+};
+
+struct CzechNameSubst {
+	enum CzechGender gender;
+	enum CzechAllow allow;
+	enum CzechChoose choose;
+	const char *name;
+};
+
+struct CzechNameAdj {
+	enum CzechPattern pattern;
+	enum CzechChoose choose;
+	const char *name;
+};
+
+// Some of items which should be common are doubled.
+static const struct CzechNameAdj name_czech_adj[] = {
+	{ CZP_JARNI, CZC_ANY, "Horní" },
+	{ CZP_JARNI, CZC_ANY, "Horní" },
+	{ CZP_JARNI, CZC_ANY, "Dolní" },
+	{ CZP_JARNI, CZC_ANY, "Dolní" },
+	{ CZP_JARNI, CZC_ANY, "Prední" },
+	{ CZP_JARNI, CZC_ANY, "Zadní" },
+	{ CZP_JARNI, CZC_ANY, "Kostelní" },
+	{ CZP_JARNI, CZC_ANY, "Havraní" },
+	{ CZP_JARNI, CZC_ANY, "Rícní" },
+	{ CZP_MLADY, CZC_ANY, "Velký" },
+	{ CZP_MLADY, CZC_ANY, "Velký" },
+	{ CZP_MLADY, CZC_ANY, "Malý" },
+	{ CZP_MLADY, CZC_ANY, "Malý" },
+	{ CZP_MLADY, CZC_ANY, "Vysoký" },
+	{ CZP_MLADY, CZC_ANY, "Ceský" },
+	{ CZP_MLADY, CZC_ANY, "Moravský" },
+	{ CZP_MLADY, CZC_ANY, "Slovácký" },
+	{ CZP_MLADY, CZC_ANY, "Uherský" },
+	{ CZP_MLADY, CZC_ANY, "Starý" },
+	{ CZP_MLADY, CZC_ANY, "Starý" },
+	{ CZP_MLADY, CZC_ANY, "Nový" },
+	{ CZP_MLADY, CZC_ANY, "Nový" },
+	{ CZP_MLADY, CZC_ANY, "Mladý" },
+	{ CZP_MLADY, CZC_ANY, "Královský" },
+	{ CZP_MLADY, CZC_ANY, "Kamenný" },
+	{ CZP_MLADY, CZC_ANY, "Cihlový" },
+	{ CZP_MLADY, CZC_ANY, "Divný" },
+	{ CZP_MLADY, CZC_COLOR, "Cervená" },
+	{ CZP_MLADY, CZC_COLOR, "Cervená" },
+	{ CZP_MLADY, CZC_COLOR, "Zelená" },
+	{ CZP_MLADY, CZC_COLOR, "Zlutá" },
+	{ CZP_MLADY, CZC_COLOR, "Sivá" },
+	{ CZP_MLADY, CZC_COLOR, "Sedá" },
+	{ CZP_MLADY, CZC_COLOR, "Bílá" },
+	{ CZP_MLADY, CZC_COLOR, "Modrá" },
+	{ CZP_MLADY, CZC_COLOR, "Ruzová" },
+	{ CZP_MLADY, CZC_COLOR, "Cerná" },
+	{ CZP_PRIVL, CZC_ANY, "Králova" },
+	{ CZP_PRIVL, CZC_ANY, "Janova" },
+	{ CZP_PRIVL, CZC_ANY, "Karlova" },
+	{ CZP_PRIVL, CZC_ANY, "Jiríkova" },
+	{ CZP_PRIVL, CZC_ANY, "Petrova" },
+	{ CZP_PRIVL, CZC_ANY, "Sudovo" },
+};
+
+// Considered a stem for choose/allow matching purposes.
+static const struct CzechNameSubst name_czech_subst_full[] = {
+	{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Sedlec" },
+	{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
+	{ CZG_SMASC, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Brod" },
+	{ CZG_SMASC, CZA_ALL, CZC_NORMAL, "Úval" },
+	{ CZG_SFEM,  CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hora" },
+	{ CZG_SFEM,  CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
+	{ CZG_SFEM,  CZA_ALL, CZC_NORMAL | CZC_COLOR, "Lhota" },
+	{ CZG_SFEM,  CZA_ALL, CZC_NORMAL | CZC_COLOR, "Hlava" },
+	{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
+	{ CZG_SNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Zdár" },
+	{ CZG_PMASC, CZA_ALL, CZC_NORMAL, "Úvaly" },
+	{ CZG_PFEM,  CZA_ALL, CZC_NORMAL | CZC_COLOR, "Luka" },
+	{ CZG_PNEUT, CZA_ALL, CZC_NORMAL | CZC_COLOR, "Pole" },
+};
+
+// TODO: More stems needed. --pasky
+static const struct CzechNameSubst name_czech_subst_stem[] = {
+	{ CZG_SMASC,             CZA_MIDDLE,            CZC_NORMAL | CZC_COLOR, "Kostel" },
+	{ CZG_SMASC,             CZA_MIDDLE,            CZC_NORMAL | CZC_COLOR, "Kláster" },
+	{ CZG_SMASC, CZA_SHORT,                         CZC_NORMAL | CZC_COLOR, "Lhot" },
+	{ CZG_SFEM,  CZA_SHORT,                         CZC_NORMAL | CZC_COLOR, "Lhot" },
+	{ CZG_SFEM,  CZA_SHORT,                         CZC_NORMAL | CZC_COLOR, "Hur" },
+	{ CZG_FREE,              CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Sedl" },
+	{ CZG_FREE,  CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hrad" },
+	{ CZG_NFREE,             CZA_MIDDLE,            CZC_NORMAL, "Pras" },
+	{ CZG_NFREE,             CZA_MIDDLE,            CZC_NORMAL, "Baz" },
+	{ CZG_NFREE,             CZA_MIDDLE,            CZC_NORMAL, "Tes" },
+	{ CZG_NFREE,             CZA_MIDDLE,            CZC_NORMAL, "Uz" },
+	{ CZG_NFREE,             CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Br" },
+	{ CZG_NFREE,             CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Vod" },
+	{ CZG_NFREE,             CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Jan" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Prach" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Kunr" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Strak" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Vit" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Vys" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Zat" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Zer" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Stred" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Harv" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Pruh" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Tach" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Písn" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Jin" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Jes" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Jar" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Sok" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Hod" },
+	{ CZG_NFREE,                          CZA_LONG, CZC_NORMAL, "Net" },
+	{ CZG_FREE,                           CZA_LONG, CZC_NORMAL, "Praz" },
+	{ CZG_FREE,                           CZA_LONG, CZC_NORMAL, "Nerat" },
+	{ CZG_FREE,                           CZA_LONG, CZC_NORMAL, "Kral" },
+	{ CZG_FREE,                           CZA_LONG, CZC_NORMAL | CZC_NOPOSTFIX, "Pan" },
+	{ CZG_FREE,  CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL, "Odstred" },
+	{ CZG_FREE,  CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_NORMAL | CZC_COLOR, "Mrat" },
+	{ CZG_FREE,                           CZA_LONG, CZC_NORMAL | CZC_COLOR, "Hlav" },
+	{ CZG_FREE,  CZA_SHORT | CZA_MIDDLE,            CZC_NORMAL, "Mer" },
+};
+
+// Optional postfix inserted between stem and ending.
+static const char *name_czech_subst_postfix[] = {
+	"av", "an", "at",
+	"ov", "on", "ot",
+	"ev", "en", "et",
+};
+
+// This array must have the both neutral genders at the end!
+static const struct CzechNameSubst name_czech_subst_ending[] = {
+	{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE,            CZC_ANY, "ec" },
+	{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE,            CZC_ANY, "ín" },
+	{ CZG_SMASC, CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ov" },
+	{ CZG_SMASC, CZA_SHORT       |        CZA_LONG, CZC_ANY, "kov" },
+	{ CZG_SMASC,                          CZA_LONG, CZC_POSTFIX, "ín" },
+	{ CZG_SMASC,                          CZA_LONG, CZC_POSTFIX, "ník" },
+	{ CZG_SFEM,  CZA_SHORT,                         CZC_ANY, "ka" },
+	{ CZG_SFEM,              CZA_MIDDLE,            CZC_ANY, "inka" },
+	{ CZG_SFEM,              CZA_MIDDLE,            CZC_NOPOSTFIX, "na" },
+	{ CZG_SFEM,              CZA_MIDDLE,            CZC_ANY, "ná" },
+	{ CZG_SFEM,                           CZA_LONG, CZC_ANY, "ava" },
+	{ CZG_PMASC,                          CZA_LONG, CZC_ANY, "íky" },
+	{ CZG_PMASC,                          CZA_LONG, CZC_ANY, "upy" },
+	{ CZG_PFEM,                           CZA_LONG, CZC_ANY, "avy" },
+	{ CZG_PFEM,  CZA_SHORT | CZA_MIDDLE | CZA_LONG, CZC_ANY, "ice" },
+	{ CZG_PNEUT, CZA_SHORT | CZA_MIDDLE,            CZC_ANY, "na" },
+	{ CZG_SNEUT, CZA_SHORT | CZA_MIDDLE,            CZC_ANY, "no" },
+	{ CZG_SNEUT,                          CZA_LONG, CZC_ANY, "iste" },
+};
+
+static const char *name_czech_suffix[] = {
+	"nad Cydlinou",
+	"nad Dyjí",
+	"nad Jihlavou",
+	"nad Labem",
+	"nad Lesy",
+	"nad Moravou",
+	"nad Nisou",
+	"nad Odrou",
+	"nad Ostravicí",
+	"nad Sázavou",
+	"nad Vltavou",
+	"pod Pradedem",
+	"pod Radhostem",
+	"pod Rípem",
+	"pod Snezkou",
+	"pod Spicákem",
+	"pod Sedlem",
+};
+
+
+
 static const char *name_romanian_real[]= {
 	"Adjud",
 	"Alba Iulia",