1/*
2 * text.c --- pseaudo text generator for use in DBGEN 2.0
3 *
4 * Defined Routines:
5 * dbg_text() -- select and translate a sentance form
6 */
7
8#ifdef TEXT_TEST
9#define DECLARER
10#endif /* TEST */
11
12#include "config.h"
13
14#include <stdlib.h>
15#if (defined(_POSIX_) || !defined(WIN32)) /* Change for Windows NT */
16#include <unistd.h>
17#endif /* WIN32 */
18#include <ctype.h>
19#include <errno.h>
20#include <limits.h>
21#include <math.h>
22#include <signal.h>
23#include <stdio.h> /* */
24#include <string.h>
25#ifdef HP
26#include <strings.h>
27#endif
28#if (defined(WIN32) && !defined(_POSIX_))
29#include <process.h>
30#pragma warning(disable : 4201)
31#pragma warning(disable : 4214)
32#pragma warning(disable : 4514)
33#define WIN32_LEAN_AND_MEAN
34#define NOATOM
35#define NOGDICAPMASKS
36#define NOMETAFILE
37#define NOMINMAX
38#define NOMSG
39#define NOOPENFILE
40#define NORASTEROPS
41#define NOSCROLL
42#define NOSOUND
43#define NOSYSMETRICS
44#define NOTEXTMETRIC
45#define NOWH
46#define NOCOMM
47#define NOKANJI
48#define NOMCX
49#include <windows.h>
50#pragma warning(default : 4201)
51#pragma warning(default : 4214)
52#endif
53
54#define TEXT_POOL_SIZE (300 * 1024 * 1024) /* 300MiB */
55
56#include "dss.h"
57#include "dsstypes.h"
58
59/*
60 * txt_vp() --
61 * generate a verb phrase by
62 * 1) selecting a verb phrase form
63 * 2) parsing it to select parts of speech
64 * 3) selecting appropriate words
65 * 4) adding punctuation as required
66 *
67 * Returns: length of generated phrase
68 * Called By: txt_sentence()
69 * Calls: pick_str()
70 */
71static int txt_vp(char *dest, int sd) {
72 char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target;
73 distribution *src;
74 int i, res = 0;
75
76 pick_str(&vp, sd, &syntax[0]);
77 parse_target = syntax;
78 while ((cptr = strtok(parse_target, " ")) != NULL) {
79 src = NULL;
80 switch (*cptr) {
81 case 'D':
82 src = &adverbs;
83 break;
84 case 'V':
85 src = &verbs;
86 break;
87 case 'X':
88 src = &auxillaries;
89 break;
90 } /* end of POS switch statement */
91 i = pick_str(src, sd, dest);
92 i = (int)strlen(DIST_MEMBER(src, i));
93 dest += i;
94 res += i;
95 if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
96 {
97 dest += 1;
98 res += 1;
99 *dest = *cptr;
100 }
101 *dest = ' ';
102 dest++;
103 res++;
104 parse_target = NULL;
105 } /* end of while loop */
106
107 return (res);
108}
109
110/*
111 * txt_np() --
112 * generate a noun phrase by
113 * 1) selecting a noun phrase form
114 * 2) parsing it to select parts of speech
115 * 3) selecting appropriate words
116 * 4) adding punctuation as required
117 *
118 * Returns: length of generated phrase
119 * Called By: txt_sentence()
120 * Calls: pick_str(),
121 */
122static int txt_np(char *dest, int sd) {
123 char syntax[MAX_GRAMMAR_LEN + 1], *cptr, *parse_target;
124 distribution *src;
125 int i, res = 0;
126
127 pick_str(&np, sd, &syntax[0]);
128 parse_target = syntax;
129 while ((cptr = strtok(parse_target, " ")) != NULL) {
130 src = NULL;
131 switch (*cptr) {
132 case 'A':
133 src = &articles;
134 break;
135 case 'J':
136 src = &adjectives;
137 break;
138 case 'D':
139 src = &adverbs;
140 break;
141 case 'N':
142 src = &nouns;
143 break;
144 } /* end of POS switch statement */
145 i = pick_str(src, sd, dest);
146 i = (int)strlen(DIST_MEMBER(src, i));
147 dest += i;
148 res += i;
149 if (*(++cptr)) /* miscelaneous fillagree, like punctuation */
150 {
151 *dest = *cptr;
152 dest += 1;
153 res += 1;
154 }
155 *dest = ' ';
156 dest++;
157 res++;
158 parse_target = NULL;
159 } /* end of while loop */
160
161 return (res);
162}
163
164/*
165 * txt_sentence() --
166 * generate a sentence by
167 * 1) selecting a sentence form
168 * 2) parsing it to select parts of speech or phrase types
169 * 3) selecting appropriate words
170 * 4) adding punctuation as required
171 *
172 * Returns: length of generated sentence
173 * Called By: dbg_text()
174 * Calls: pick_str(), txt_np(), txt_vp()
175 */
176static int txt_sentence(char *dest, int sd) {
177 char syntax[MAX_GRAMMAR_LEN + 1], *cptr;
178 int i, res = 0, len = 0;
179
180 pick_str(&grammar, sd, syntax);
181 cptr = syntax;
182
183next_token: /* I hate goto's, but can't seem to have parent and child use strtok() */
184 while (*cptr && *cptr == ' ')
185 cptr++;
186 if (*cptr == '\0')
187 goto done;
188 switch (*cptr) {
189 case 'V':
190 len = txt_vp(dest, sd);
191 break;
192 case 'N':
193 len = txt_np(dest, sd);
194 break;
195 case 'P':
196 i = pick_str(&prepositions, sd, dest);
197 len = (int)strlen(DIST_MEMBER(&prepositions, i));
198 strcpy((dest + len), " the ");
199 len += 5;
200 len += txt_np(dest + len, sd);
201 break;
202 case 'T':
203 i = pick_str(&terminators, sd, --dest); /*terminators should abut previous word */
204 len = (int)strlen(DIST_MEMBER(&terminators, i));
205 break;
206 } /* end of POS switch statement */
207 dest += len;
208 res += len;
209 cptr++;
210 if (*cptr && *cptr != ' ') /* miscelaneous fillagree, like punctuation */
211 {
212 dest += 1;
213 res += 1;
214 *dest = *cptr;
215 }
216 goto next_token;
217done:
218 *dest = '\0';
219 return (--res);
220}
221
222static char *gen_text(char *dest, int sd, distribution *s) {
223 long i = 0;
224 DSS_HUGE j;
225
226 RANDOM(j, 1, s->list[s->count - 1].weight, sd);
227 while (s->list[i].weight < j)
228 i++;
229 char *src = s->list[i].text;
230 int ind = 0;
231 while (src[ind]) {
232 dest[ind] = src[ind];
233 ind++;
234 }
235 dest[ind] = ' ';
236 return dest + ind + 1;
237}
238
239#define NOUN_MAX_WEIGHT 340
240#define ADJECTIVES_MAX_WEIGHT 289
241#define ADVERBS_MAX_WEIGHT 262
242#define AUXILLARIES_MAX_WEIGHT 18
243#define VERBS_MAX_WEIGHT 174
244#define PREPOSITIONS_MAX_WEIGHT 456
245
246static char *noun_index[NOUN_MAX_WEIGHT + 1];
247static char *adjectives_index[ADJECTIVES_MAX_WEIGHT + 1];
248static char *adverbs_index[ADVERBS_MAX_WEIGHT + 1];
249static char *auxillaries_index[AUXILLARIES_MAX_WEIGHT + 1];
250static char *verbs_index[VERBS_MAX_WEIGHT + 1];
251static char *prepositions_index[PREPOSITIONS_MAX_WEIGHT + 1];
252
253// generate a lookup table for weight -> str
254static void gen_index(char **index, distribution *s) {
255 for (size_t w = 0; w <= s->list[s->count - 1].weight; w++) {
256 long i = 0;
257 while (s->list[i].weight < w)
258 i++;
259 index[w] = s->list[i].text;
260 }
261}
262
263static char *gen_text_index(char *dest, int sd, char **index, distribution *s) {
264 long i = 0;
265 DSS_HUGE j;
266
267 RANDOM(j, 1, s->list[s->count - 1].weight, sd);
268 char *src = index[j];
269 int ind = 0;
270 while (src[ind]) {
271 dest[ind] = src[ind];
272 ind++;
273 }
274 dest[ind] = ' ';
275 return dest + ind + 1;
276}
277
278static char *gen_vp(char *dest, int sd) {
279 DSS_HUGE j;
280 RANDOM(j, 1, vp.list[vp.count - 1].weight, sd);
281 int index = 0;
282 index += vp.list[0].weight < j;
283 index += vp.list[1].weight < j;
284 index += vp.list[2].weight < j;
285
286 if (index == 0) {
287 dest = gen_text_index(dest, sd, verbs_index, &verbs);
288 } else if (index == 1) {
289 dest = gen_text_index(dest, sd, auxillaries_index, &auxillaries);
290 dest = gen_text_index(dest, sd, verbs_index, &verbs);
291 } else if (index == 2) {
292 dest = gen_text_index(dest, sd, verbs_index, &verbs);
293 dest = gen_text_index(dest, sd, adverbs_index, &adverbs);
294 } else {
295 dest = gen_text_index(dest, sd, auxillaries_index, &auxillaries);
296 dest = gen_text_index(dest, sd, verbs_index, &verbs);
297 dest = gen_text_index(dest, sd, adverbs_index, &adverbs);
298 }
299 return dest;
300}
301
302static char *gen_np(char *dest, int sd) {
303 DSS_HUGE j;
304 RANDOM(j, 1, np.list[np.count - 1].weight, sd);
305 int index = 0;
306 index += np.list[0].weight < j;
307 index += np.list[1].weight < j;
308 index += np.list[2].weight < j;
309
310 if (index == 0) {
311 dest = gen_text_index(dest, sd, noun_index, &nouns);
312 } else if (index == 1) {
313 dest = gen_text_index(dest, sd, adjectives_index, &adjectives);
314 dest = gen_text_index(dest, sd, noun_index, &nouns);
315 } else if (index == 2) {
316 dest = gen_text_index(dest, sd, adjectives_index, &adjectives);
317 dest[-1] = ',';
318 *(dest++) = ' ';
319 dest = gen_text_index(dest, sd, adjectives_index, &adjectives);
320 dest = gen_text_index(dest, sd, noun_index, &nouns);
321 } else {
322 dest = gen_text_index(dest, sd, adverbs_index, &adverbs);
323 dest = gen_text_index(dest, sd, adjectives_index, &adjectives);
324 dest = gen_text_index(dest, sd, noun_index, &nouns);
325 }
326 return dest;
327}
328
329static char *gen_preposition(char *dest, int sd) {
330 dest = gen_text_index(dest, sd, prepositions_index, &prepositions);
331 *(dest++) = 't';
332 *(dest++) = 'h';
333 *(dest++) = 'e';
334 *(dest++) = ' ';
335 return gen_np(dest, sd);
336}
337
338static char *gen_terminator(char *dest, int sd) {
339 dest = gen_text(--dest, sd, &terminators);
340 return dest - 1;
341}
342
343static char *gen_sentence(char *dest, int sd) {
344 const char *cptr;
345 int i;
346
347 DSS_HUGE j;
348 RANDOM(j, 1, grammar.list[grammar.count - 1].weight, sd);
349 int index = 0;
350 index += grammar.list[0].weight < j;
351 index += grammar.list[1].weight < j;
352 index += grammar.list[2].weight < j;
353 index += grammar.list[3].weight < j;
354 cptr = grammar.list[index].text;
355
356 if (index == 0) {
357 dest = gen_np(dest, sd);
358 dest = gen_vp(dest, sd);
359 dest = gen_terminator(dest, sd);
360 } else if (index == 1) {
361 dest = gen_np(dest, sd);
362 dest = gen_vp(dest, sd);
363 dest = gen_preposition(dest, sd);
364 dest = gen_terminator(dest, sd);
365 } else if (index == 2) {
366 dest = gen_np(dest, sd);
367 dest = gen_vp(dest, sd);
368 dest = gen_np(dest, sd);
369 dest = gen_terminator(dest, sd);
370 } else if (index == 3) {
371 dest = gen_np(dest, sd);
372 dest = gen_preposition(dest, sd);
373 dest = gen_vp(dest, sd);
374 dest = gen_np(dest, sd);
375 dest = gen_terminator(dest, sd);
376 } else {
377 dest = gen_np(dest, sd);
378 dest = gen_preposition(dest, sd);
379 dest = gen_vp(dest, sd);
380 dest = gen_preposition(dest, sd);
381 dest = gen_terminator(dest, sd);
382 }
383 *dest = ' ';
384 return dest + 1;
385}
386
387/*
388 * dbg_text() --
389 * produce ELIZA-like text of random, bounded length, truncating the last
390 * generated sentence as required
391 */
392void dbg_text(char *tgt, int min, int max, int sd) {
393 DSS_HUGE hgLength = 0, hgOffset, wordlen = 0, s_len, needed;
394 char sentence[MAX_SENT_LEN + 1], *cp;
395 static char szTextPool[TEXT_POOL_SIZE + 1 + 100];
396 static int bInit = 0;
397
398 if (!bInit) {
399 gen_index(noun_index, &nouns);
400 gen_index(adjectives_index, &adjectives);
401 gen_index(adverbs_index, &adverbs);
402 gen_index(auxillaries_index, &auxillaries);
403 gen_index(verbs_index, &verbs);
404 gen_index(prepositions_index, &prepositions);
405
406 char *ptr = szTextPool;
407 char *endptr = szTextPool + TEXT_POOL_SIZE + 1;
408 while (ptr < endptr) {
409 ptr = gen_sentence(ptr, 5);
410 }
411 szTextPool[TEXT_POOL_SIZE] = '\0';
412
413 bInit = 1;
414 }
415
416 RANDOM(hgOffset, 0, TEXT_POOL_SIZE - max, sd);
417 RANDOM(hgLength, min, max, sd);
418 strncpy(&tgt[0], &szTextPool[hgOffset], (int)hgLength);
419 tgt[hgLength] = '\0';
420
421 return;
422}
423
424#ifdef TEXT_TEST
425tdef tdefs[1] = {NULL};
426distribution nouns, verbs, adjectives, adverbs, auxillaries, terminators, articles, prepositions, grammar, np, vp;
427
428int main() {
429 char prattle[401];
430
431 verbose = 1;
432
433 read_dist(env_config(DIST_TAG, DIST_DFLT), "nouns", &nouns);
434 read_dist(env_config(DIST_TAG, DIST_DFLT), "verbs", &verbs);
435 read_dist(env_config(DIST_TAG, DIST_DFLT), "adjectives", &adjectives);
436 read_dist(env_config(DIST_TAG, DIST_DFLT), "adverbs", &adverbs);
437 read_dist(env_config(DIST_TAG, DIST_DFLT), "auxillaries", &auxillaries);
438 read_dist(env_config(DIST_TAG, DIST_DFLT), "terminators", &terminators);
439 read_dist(env_config(DIST_TAG, DIST_DFLT), "articles", &articles);
440 read_dist(env_config(DIST_TAG, DIST_DFLT), "prepositions", &prepositions);
441 read_dist(env_config(DIST_TAG, DIST_DFLT), "grammar", &grammar);
442 read_dist(env_config(DIST_TAG, DIST_DFLT), "np", &np);
443 read_dist(env_config(DIST_TAG, DIST_DFLT), "vp", &vp);
444
445 while (1) {
446 dbg_text(&prattle[0], 300, 400, 0);
447 printf("<%s>\n", prattle);
448 }
449
450 return (0);
451}
452#endif /* TEST */
453