1 | /* |
2 | * text.c --- pseaudo text generator for use in DBGEN 2.0 |
3 | * |
4 | * Defined Routines: |
5 | * dbg_text() -- select and translate a sentance form |
6 | */ |
7 | |
8 | #ifdef TEXT_TEST |
9 | #define DECLARER |
10 | #endif /* TEST */ |
11 | |
12 | #include "config.h" |
13 | |
14 | #include <stdlib.h> |
15 | #if (defined(_POSIX_) || !defined(WIN32)) /* Change for Windows NT */ |
16 | #include <unistd.h> |
17 | #endif /* WIN32 */ |
18 | #include <ctype.h> |
19 | #include <errno.h> |
20 | #include <limits.h> |
21 | #include <math.h> |
22 | #include <signal.h> |
23 | #include <stdio.h> /* */ |
24 | #include <string.h> |
25 | #ifdef HP |
26 | #include <strings.h> |
27 | #endif |
28 | #if (defined(WIN32) && !defined(_POSIX_)) |
29 | #include <process.h> |
30 | #pragma warning(disable : 4201) |
31 | #pragma warning(disable : 4214) |
32 | #pragma warning(disable : 4514) |
33 | #define WIN32_LEAN_AND_MEAN |
34 | #define NOATOM |
35 | #define NOGDICAPMASKS |
36 | #define NOMETAFILE |
37 | #define NOMINMAX |
38 | #define NOMSG |
39 | #define NOOPENFILE |
40 | #define NORASTEROPS |
41 | #define NOSCROLL |
42 | #define NOSOUND |
43 | #define NOSYSMETRICS |
44 | #define NOTEXTMETRIC |
45 | #define NOWH |
46 | #define NOCOMM |
47 | #define NOKANJI |
48 | #define NOMCX |
49 | #include <windows.h> |
50 | #pragma warning(default : 4201) |
51 | #pragma warning(default : 4214) |
52 | #endif |
53 | |
54 | #define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */ |
55 | |
56 | #include "dss.h" |
57 | #include "dsstypes.h" |
58 | |
59 | /* |
60 | * txt_vp() -- |
61 | * generate a verb phrase by |
62 | * 1) selecting a verb phrase form |
63 | * 2) parsing it to select parts of speech |
64 | * 3) selecting appropriate words |
65 | * 4) adding punctuation as required |
66 | * |
67 | * Returns: length of generated phrase |
68 | * Called By: txt_sentence() |
69 | * Calls: pick_str() |
70 | */ |
71 | static int txt_vp(char *dest, int sd) { |
72 | char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target; |
73 | distribution *src; |
74 | int i, res = 0; |
75 | |
76 | pick_str(&vp, sd, &syntax[0]); |
77 | parse_target = syntax; |
78 | while ((cptr = strtok(parse_target, " " )) != NULL) { |
79 | src = NULL; |
80 | switch (*cptr) { |
81 | case 'D': |
82 | src = &adverbs; |
83 | break; |
84 | case 'V': |
85 | src = &verbs; |
86 | break; |
87 | case 'X': |
88 | src = &auxillaries; |
89 | break; |
90 | } /* end of POS switch statement */ |
91 | i = pick_str(src, sd, dest); |
92 | i = (int)strlen(DIST_MEMBER(src, i)); |
93 | dest += i; |
94 | res += i; |
95 | if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ |
96 | { |
97 | dest += 1; |
98 | res += 1; |
99 | *dest = *cptr; |
100 | } |
101 | *dest = ' '; |
102 | dest++; |
103 | res++; |
104 | parse_target = NULL; |
105 | } /* end of while loop */ |
106 | |
107 | return (res); |
108 | } |
109 | |
110 | /* |
111 | * txt_np() -- |
112 | * generate a noun phrase by |
113 | * 1) selecting a noun phrase form |
114 | * 2) parsing it to select parts of speech |
115 | * 3) selecting appropriate words |
116 | * 4) adding punctuation as required |
117 | * |
118 | * Returns: length of generated phrase |
119 | * Called By: txt_sentence() |
120 | * Calls: pick_str(), |
121 | */ |
122 | static int txt_np(char *dest, int sd) { |
123 | char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target; |
124 | distribution *src; |
125 | int i, res = 0; |
126 | |
127 | pick_str(&np, sd, &syntax[0]); |
128 | parse_target = syntax; |
129 | while ((cptr = strtok(parse_target, " " )) != NULL) { |
130 | src = NULL; |
131 | switch (*cptr) { |
132 | case 'A': |
133 | src = &articles; |
134 | break; |
135 | case 'J': |
136 | src = &adjectives; |
137 | break; |
138 | case 'D': |
139 | src = &adverbs; |
140 | break; |
141 | case 'N': |
142 | src = &nouns; |
143 | break; |
144 | } /* end of POS switch statement */ |
145 | i = pick_str(src, sd, dest); |
146 | i = (int)strlen(DIST_MEMBER(src, i)); |
147 | dest += i; |
148 | res += i; |
149 | if (*(++cptr)) /* miscelaneous fillagree, like punctuation */ |
150 | { |
151 | *dest = *cptr; |
152 | dest += 1; |
153 | res += 1; |
154 | } |
155 | *dest = ' '; |
156 | dest++; |
157 | res++; |
158 | parse_target = NULL; |
159 | } /* end of while loop */ |
160 | |
161 | return (res); |
162 | } |
163 | |
164 | /* |
165 | * txt_sentence() -- |
166 | * generate a sentence by |
167 | * 1) selecting a sentence form |
168 | * 2) parsing it to select parts of speech or phrase types |
169 | * 3) selecting appropriate words |
170 | * 4) adding punctuation as required |
171 | * |
172 | * Returns: length of generated sentence |
173 | * Called By: dbg_text() |
174 | * Calls: pick_str(), txt_np(), txt_vp() |
175 | */ |
176 | static int txt_sentence(char *dest, int sd) { |
177 | char syntax[MAX_GRAMMAR_LEN + 1], *cptr; |
178 | int i, res = 0, len = 0; |
179 | |
180 | pick_str(&grammar, sd, syntax); |
181 | cptr = syntax; |
182 | |
183 | next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */ |
184 | while (*cptr && *cptr == ' ') |
185 | cptr++; |
186 | if (*cptr == '\0') |
187 | goto done; |
188 | switch (*cptr) { |
189 | case 'V': |
190 | len = txt_vp(dest, sd); |
191 | break; |
192 | case 'N': |
193 | len = txt_np(dest, sd); |
194 | break; |
195 | case 'P': |
196 | i = pick_str(&prepositions, sd, dest); |
197 | len = (int)strlen(DIST_MEMBER(&prepositions, i)); |
198 | strcpy((dest + len), " the " ); |
199 | len += 5; |
200 | len += txt_np(dest + len, sd); |
201 | break; |
202 | case 'T': |
203 | i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */ |
204 | len = (int)strlen(DIST_MEMBER(&terminators, i)); |
205 | break; |
206 | } /* end of POS switch statement */ |
207 | dest += len; |
208 | res += len; |
209 | cptr++; |
210 | if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */ |
211 | { |
212 | dest += 1; |
213 | res += 1; |
214 | *dest = *cptr; |
215 | } |
216 | goto next_token; |
217 | done: |
218 | *dest = '\0'; |
219 | return (--res); |
220 | } |
221 | |
222 | static char *gen_text(char *dest, int sd, distribution *s) { |
223 | long i = 0; |
224 | DSS_HUGE j; |
225 | |
226 | RANDOM(j, 1, s->list[s->count - 1].weight, sd); |
227 | while (s->list[i].weight < j) |
228 | i++; |
229 | char *src = s->list[i].text; |
230 | int ind = 0; |
231 | while (src[ind]) { |
232 | dest[ind] = src[ind]; |
233 | ind++; |
234 | } |
235 | dest[ind] = ' '; |
236 | return dest + ind + 1; |
237 | } |
238 | |
239 | #define NOUN_MAX_WEIGHT 340 |
240 | #define ADJECTIVES_MAX_WEIGHT 289 |
241 | #define ADVERBS_MAX_WEIGHT 262 |
242 | #define AUXILLARIES_MAX_WEIGHT 18 |
243 | #define VERBS_MAX_WEIGHT 174 |
244 | #define PREPOSITIONS_MAX_WEIGHT 456 |
245 | |
246 | static char *noun_index[NOUN_MAX_WEIGHT + 1]; |
247 | static char *adjectives_index[ADJECTIVES_MAX_WEIGHT + 1]; |
248 | static char *adverbs_index[ADVERBS_MAX_WEIGHT + 1]; |
249 | static char *auxillaries_index[AUXILLARIES_MAX_WEIGHT + 1]; |
250 | static char *verbs_index[VERBS_MAX_WEIGHT + 1]; |
251 | static char *prepositions_index[PREPOSITIONS_MAX_WEIGHT + 1]; |
252 | |
253 | // generate a lookup table for weight -> str |
254 | static void gen_index(char **index, distribution *s) { |
255 | for (size_t w = 0; w <= s->list[s->count - 1].weight; w++) { |
256 | long i = 0; |
257 | while (s->list[i].weight < w) |
258 | i++; |
259 | index[w] = s->list[i].text; |
260 | } |
261 | } |
262 | |
263 | static char *gen_text_index(char *dest, int sd, char **index, distribution *s) { |
264 | long i = 0; |
265 | DSS_HUGE j; |
266 | |
267 | RANDOM(j, 1, s->list[s->count - 1].weight, sd); |
268 | char *src = index[j]; |
269 | int ind = 0; |
270 | while (src[ind]) { |
271 | dest[ind] = src[ind]; |
272 | ind++; |
273 | } |
274 | dest[ind] = ' '; |
275 | return dest + ind + 1; |
276 | } |
277 | |
278 | static char *gen_vp(char *dest, int sd) { |
279 | DSS_HUGE j; |
280 | RANDOM(j, 1, vp.list[vp.count - 1].weight, sd); |
281 | int index = 0; |
282 | index += vp.list[0].weight < j; |
283 | index += vp.list[1].weight < j; |
284 | index += vp.list[2].weight < j; |
285 | |
286 | if (index == 0) { |
287 | dest = gen_text_index(dest, sd, verbs_index, &verbs); |
288 | } else if (index == 1) { |
289 | dest = gen_text_index(dest, sd, auxillaries_index, &auxillaries); |
290 | dest = gen_text_index(dest, sd, verbs_index, &verbs); |
291 | } else if (index == 2) { |
292 | dest = gen_text_index(dest, sd, verbs_index, &verbs); |
293 | dest = gen_text_index(dest, sd, adverbs_index, &adverbs); |
294 | } else { |
295 | dest = gen_text_index(dest, sd, auxillaries_index, &auxillaries); |
296 | dest = gen_text_index(dest, sd, verbs_index, &verbs); |
297 | dest = gen_text_index(dest, sd, adverbs_index, &adverbs); |
298 | } |
299 | return dest; |
300 | } |
301 | |
302 | static char *gen_np(char *dest, int sd) { |
303 | DSS_HUGE j; |
304 | RANDOM(j, 1, np.list[np.count - 1].weight, sd); |
305 | int index = 0; |
306 | index += np.list[0].weight < j; |
307 | index += np.list[1].weight < j; |
308 | index += np.list[2].weight < j; |
309 | |
310 | if (index == 0) { |
311 | dest = gen_text_index(dest, sd, noun_index, &nouns); |
312 | } else if (index == 1) { |
313 | dest = gen_text_index(dest, sd, adjectives_index, &adjectives); |
314 | dest = gen_text_index(dest, sd, noun_index, &nouns); |
315 | } else if (index == 2) { |
316 | dest = gen_text_index(dest, sd, adjectives_index, &adjectives); |
317 | dest[-1] = ','; |
318 | *(dest++) = ' '; |
319 | dest = gen_text_index(dest, sd, adjectives_index, &adjectives); |
320 | dest = gen_text_index(dest, sd, noun_index, &nouns); |
321 | } else { |
322 | dest = gen_text_index(dest, sd, adverbs_index, &adverbs); |
323 | dest = gen_text_index(dest, sd, adjectives_index, &adjectives); |
324 | dest = gen_text_index(dest, sd, noun_index, &nouns); |
325 | } |
326 | return dest; |
327 | } |
328 | |
329 | static char *gen_preposition(char *dest, int sd) { |
330 | dest = gen_text_index(dest, sd, prepositions_index, &prepositions); |
331 | *(dest++) = 't'; |
332 | *(dest++) = 'h'; |
333 | *(dest++) = 'e'; |
334 | *(dest++) = ' '; |
335 | return gen_np(dest, sd); |
336 | } |
337 | |
338 | static char *gen_terminator(char *dest, int sd) { |
339 | dest = gen_text(--dest, sd, &terminators); |
340 | return dest - 1; |
341 | } |
342 | |
343 | static char *gen_sentence(char *dest, int sd) { |
344 | const char *cptr; |
345 | int i; |
346 | |
347 | DSS_HUGE j; |
348 | RANDOM(j, 1, grammar.list[grammar.count - 1].weight, sd); |
349 | int index = 0; |
350 | index += grammar.list[0].weight < j; |
351 | index += grammar.list[1].weight < j; |
352 | index += grammar.list[2].weight < j; |
353 | index += grammar.list[3].weight < j; |
354 | cptr = grammar.list[index].text; |
355 | |
356 | if (index == 0) { |
357 | dest = gen_np(dest, sd); |
358 | dest = gen_vp(dest, sd); |
359 | dest = gen_terminator(dest, sd); |
360 | } else if (index == 1) { |
361 | dest = gen_np(dest, sd); |
362 | dest = gen_vp(dest, sd); |
363 | dest = gen_preposition(dest, sd); |
364 | dest = gen_terminator(dest, sd); |
365 | } else if (index == 2) { |
366 | dest = gen_np(dest, sd); |
367 | dest = gen_vp(dest, sd); |
368 | dest = gen_np(dest, sd); |
369 | dest = gen_terminator(dest, sd); |
370 | } else if (index == 3) { |
371 | dest = gen_np(dest, sd); |
372 | dest = gen_preposition(dest, sd); |
373 | dest = gen_vp(dest, sd); |
374 | dest = gen_np(dest, sd); |
375 | dest = gen_terminator(dest, sd); |
376 | } else { |
377 | dest = gen_np(dest, sd); |
378 | dest = gen_preposition(dest, sd); |
379 | dest = gen_vp(dest, sd); |
380 | dest = gen_preposition(dest, sd); |
381 | dest = gen_terminator(dest, sd); |
382 | } |
383 | *dest = ' '; |
384 | return dest + 1; |
385 | } |
386 | |
387 | /* |
388 | * dbg_text() -- |
389 | * produce ELIZA-like text of random, bounded length, truncating the last |
390 | * generated sentence as required |
391 | */ |
392 | void dbg_text(char *tgt, int min, int max, int sd) { |
393 | DSS_HUGE hgLength = 0, hgOffset, wordlen = 0, s_len, needed; |
394 | char sentence[MAX_SENT_LEN + 1], *cp; |
395 | static char szTextPool[TEXT_POOL_SIZE + 1 + 100]; |
396 | static int bInit = 0; |
397 | |
398 | if (!bInit) { |
399 | gen_index(noun_index, &nouns); |
400 | gen_index(adjectives_index, &adjectives); |
401 | gen_index(adverbs_index, &adverbs); |
402 | gen_index(auxillaries_index, &auxillaries); |
403 | gen_index(verbs_index, &verbs); |
404 | gen_index(prepositions_index, &prepositions); |
405 | |
406 | char *ptr = szTextPool; |
407 | char *endptr = szTextPool + TEXT_POOL_SIZE + 1; |
408 | while (ptr < endptr) { |
409 | ptr = gen_sentence(ptr, 5); |
410 | } |
411 | szTextPool[TEXT_POOL_SIZE] = '\0'; |
412 | |
413 | bInit = 1; |
414 | } |
415 | |
416 | RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd); |
417 | RANDOM(hgLength, min, max, sd); |
418 | strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength); |
419 | tgt[hgLength] = '\0'; |
420 | |
421 | return; |
422 | } |
423 | |
424 | #ifdef TEXT_TEST |
425 | tdef tdefs[1] = {NULL}; |
426 | distribution nouns, verbs, adjectives, adverbs, auxillaries, terminators, articles, prepositions, grammar, np, vp; |
427 | |
428 | int main() { |
429 | char prattle[401]; |
430 | |
431 | verbose = 1; |
432 | |
433 | read_dist(env_config(DIST_TAG, DIST_DFLT), "nouns" , &nouns); |
434 | read_dist(env_config(DIST_TAG, DIST_DFLT), "verbs" , &verbs); |
435 | read_dist(env_config(DIST_TAG, DIST_DFLT), "adjectives" , &adjectives); |
436 | read_dist(env_config(DIST_TAG, DIST_DFLT), "adverbs" , &adverbs); |
437 | read_dist(env_config(DIST_TAG, DIST_DFLT), "auxillaries" , &auxillaries); |
438 | read_dist(env_config(DIST_TAG, DIST_DFLT), "terminators" , &terminators); |
439 | read_dist(env_config(DIST_TAG, DIST_DFLT), "articles" , &articles); |
440 | read_dist(env_config(DIST_TAG, DIST_DFLT), "prepositions" , &prepositions); |
441 | read_dist(env_config(DIST_TAG, DIST_DFLT), "grammar" , &grammar); |
442 | read_dist(env_config(DIST_TAG, DIST_DFLT), "np" , &np); |
443 | read_dist(env_config(DIST_TAG, DIST_DFLT), "vp" , &vp); |
444 | |
445 | while (1) { |
446 | dbg_text(&prattle[0], 300, 400, 0); |
447 | printf("<%s>\n" , prattle); |
448 | } |
449 | |
450 | return (0); |
451 | } |
452 | #endif /* TEST */ |
453 | |