-
Notifications
You must be signed in to change notification settings - Fork 0
/
takeon-output.c
730 lines (640 loc) · 27.1 KB
/
takeon-output.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
// This code outputs the tables generated by takeon.c It is included within takeon.c
static int verbose = FALSE;
//#define PARM_NO_FLEX 1 // Enable this to avoid the use of flex
// arrays and to speed up execution.
#include "flex.h"
static FILE *grammar; // The input file. It is read from in
// multiple passes to handle forward references.
// All input comes through these two procedures:
static int file_offset = 0, source_line = 1, source_column = 1, last_line_last_column = 1;
void fatal_(int error, int line) {
fprintf(stderr, "%s:%d:%d: Error: ", grammarfn, source_line, source_column);
if (error == WEOF) {
fprintf(stderr, "Premature end of file at line %d."
" Grammar should end with a 'E' command."
" (detected in %s, line %d)\n",
source_line, __FILE__, line);
if (source_line == 1) {
fprintf(stderr, "(You're not by any chance trying to compile using tcc,"
" are you? Its wchar_t support seems to be very broken...)"
"\n");
}
} else if (error == 1) {
// fail silently as problem already reported
} else if (error == '0') {
fprintf(stderr, "Syntax error at line %d while looking for a digit."
" (detected in %s, line %d)\n",
source_line, __FILE__, line);
} else if (error > ' ') {
fprintf(stderr, "Syntax error at line %d while looking for a '%c'."
" (detected in %s, line %d)\n",
source_line, error, __FILE__, line);
} else {
fprintf(stderr, "Syntax error at line %d (detected in %s, line %d)\n",
source_line, __FILE__, line);
}
exit(EXIT_FAILURE);
}
#define fatal(n) fatal_(n,__LINE__)
// Need something similar for warnings.
wint_t get_wide_char(FILE *f) {
wint_t c = fgetwc(f);
if (c == '\n') {
source_line += 1;
last_line_last_column = source_column;
source_column = 1;
} else source_column += 1;
//if (verbose) fputwc(c, stderr);
file_offset += 1;
return c;
}
void unget_wide_char(wint_t c, FILE *f) {
ungetwc(c, f);
file_offset -= 1;
if (c == '\n') {
source_line -= 1; source_column = last_line_last_column;
} else {
source_column -= 1;
//if (verbose) fputwc(L'\b', stderr);
}
}
static wint_t next_wide_char(void) {
wint_t c = get_wide_char(grammar);
if ((c == WEOF) /*|| ferror(grammar)*/) fatal(WEOF);
return c;
}
static wint_t nonspace(void) {
for (;;) {
wint_t c = get_wide_char(grammar);
if ((c == WEOF) /* || ferror(grammar)*/) fatal(WEOF);
if (!isspace(c)) return(c);
}
}
// Every entry in the grammar consists of a type code and a data value, usually
// an index into some array or other.
// Type of an entry in the main grammar array gram[]:
typedef unsigned long int ENTRY;
typedef int KEYWORD_INDEX;
typedef int REGEXP_INDEX;
typedef int LITSTR_INDEX;
typedef int GRAMMAR_INDEX;
#define NEGATED_PHRASE (1U<<24U)
#define GUARD_PHRASE (1U<<25U)
#define WHITESPACE_ALLOWED (1U<<26U)
#define GRAMMAR_TYPE_SHIFT 27U
#define GRAMMAR_TYPE_MASK 31U
#define INDEX_MASK 0xFFFFFFU
// We have enough spare bits in a grammar table entry
// to support types with values 1 through 31.
//
// I never use 0 in enumerations like this as it
// helps catch errors from variables which have
// not had a type explicitly set in them.
#define BASE_BIP_TYPE 1U
#define BASE_PHRASE_TYPE 2U
#define BASE_SEMANTIC_TYPE 3U
#define BASE_KEYWORD_TYPE 4U
#define BASE_CHAR_TYPE 5U
#define BASE_UTF32CHAR_TYPE 6U
#define BASE_STRING_TYPE 7U
#define BASE_UTF32STRING_TYPE 8U
#define BASE_REGEXP_TYPE 9U
#define BASE_OPTION_TYPE 10U
#define BASE_COUNT_OF_ALTS 11U
#define BASE_COUNT_OF_PHRASES 12U
#define BASE_ALT_NUMBER 13U
#define BIP_TYPE (BASE_BIP_TYPE <<GRAMMAR_TYPE_SHIFT)
#define PHRASE_TYPE (BASE_PHRASE_TYPE <<GRAMMAR_TYPE_SHIFT)
#define SEMANTIC_TYPE (BASE_SEMANTIC_TYPE <<GRAMMAR_TYPE_SHIFT)
#define KEYWORD_TYPE (BASE_KEYWORD_TYPE <<GRAMMAR_TYPE_SHIFT)
#define CHAR_TYPE (BASE_CHAR_TYPE <<GRAMMAR_TYPE_SHIFT)
#define UTF32CHAR_TYPE (BASE_UTF32CHAR_TYPE <<GRAMMAR_TYPE_SHIFT)
#define STRING_TYPE (BASE_STRING_TYPE <<GRAMMAR_TYPE_SHIFT)
#define UTF32STRING_TYPE (BASE_UTF32STRING_TYPE <<GRAMMAR_TYPE_SHIFT)
#define REGEXP_TYPE (BASE_REGEXP_TYPE <<GRAMMAR_TYPE_SHIFT)
#define OPTION_TYPE (BASE_OPTION_TYPE <<GRAMMAR_TYPE_SHIFT)
#define COUNT_OF_ALTS (BASE_COUNT_OF_ALTS <<GRAMMAR_TYPE_SHIFT)
#define COUNT_OF_PHRASES (BASE_COUNT_OF_PHRASES <<GRAMMAR_TYPE_SHIFT)
#define ALT_NUMBER (BASE_ALT_NUMBER <<GRAMMAR_TYPE_SHIFT)
#define PhraseType(idx) ((((idx)>>GRAMMAR_TYPE_SHIFT)&GRAMMAR_TYPE_MASK))
// These are not hard limits. The Flex structure can expand to as large
// as necessary. These are only here 1) in case we're using fixed-size
// arrays rather than flex arrays, or 2) to limit runaway coding errors.
// These MAX sizes do not propogate to the header file.
#define MAX_GRAMMAR (1024*16)
#define MAX_PHRASES (1024*4) // extra during debugging
#define MAX_KEYWORDS 1024
#define MAX_REGEXPS 1024
#define MAX_LIT (1024*128)
#define MAX_COMMENT 4096
#define MAX_C 4096
#define MAX_BIPS 64
static int LARGEST_ALT = 0;
static int BIP_BASE = 0;
static int PHRASE_BASE = 0;
static int SEMANTIC_BASE = 0;
static int AST_BASE = 0;
// Apart from StringPool, which is used internally, these arrays are
// the ones which are eventually written out to the header file as
// const arrays. Internally we refer to strings by an index into the
// stringpool, but in the header file, the arrays of strings are literal
// strings. (Except in the grammar where we uses indices into the individual
// arrays, of regexps, keywords, etc.)
// All strings are stored here.
typedef int STRINGINDEX;
static DECLARE(StringPool, wchar_t, MAX_LIT);
#define _StringPool(x) WRITE(x,StringPool,wchar_t)
#define StringPool(x) READ(x,StringPool,wchar_t)
static STRINGINDEX Str_nextfree = 0;
#define String(x) &_StringPool(x)
// Filter '-' out of strings so they can be used as C variables,
// and convert spaces in identifiers to '_'
wchar_t *CString(STRINGINDEX x) {
wchar_t *Orig = String(x);
STRINGINDEX Result = Str_nextfree;
do {
wint_t wc;
wc = *Orig;
if (wc == '-' || wc == ' ') wc = '_';
_StringPool(Str_nextfree++) = wc;
} while (*Orig++ != '\0');
return String(Result);
}
// DUE TO PROBLEMS SETTING UP GRAM VALUES PROPERLY, I'M SEPARATING BIP/PHRASE/C INTO THEIR OWN TABLES AND INDEXES FROM 0.
// For the name of B<...> code.
static DECLARE(bip_phrasename, STRINGINDEX, MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _bip_phrasename(x) WRITE(x,bip_phrasename,STRINGINDEX)
#define bip_phrasename(x) READ(x,bip_phrasename,STRINGINDEX)
// Map of user-supplied BIP number to a consecutive sequence used internally
static DECLARE(bip_map, ENTRY, MAX_BIPS);
#define _bip_map(x) WRITE(x,bip_map,ENTRY)
#define bip_map(x) READ(x,bip_map,ENTRY)
static int NEXT_FREE_BIPNO = 0, NUM_BIPS = 0;
int current_def_bipno=unassigned,
current_internal_bipno=unassigned,
current_user_bipno=unassigned;
// For the offset into the grammar where the sequentially-numbered P<name> is stored.
static DECLARE(sequential_phrase_no_to_grammar_index, ENTRY, MAX_PHRASES);
#define _sequential_phrase_no_to_grammar_index(x) WRITE(x,sequential_phrase_no_to_grammar_index,ENTRY)
#define sequential_phrase_no_to_grammar_index(x) READ(x,sequential_phrase_no_to_grammar_index,ENTRY)
// For the name of B<> and P<...> definitions. And later also C<> definitions will be added.
// Maps the location in the grammar where a phrase is stored
// to the sequential phrase number, i.e. a mapping of G_x to P_x
// NOT YET USED.
static DECLARE(grammar_index_to_sequential_phrase_number, int, MAX_PHRASES);
#define _grammar_index_to_sequential_phrase_number(x) WRITE(x,grammar_index_to_sequential_phrase_number,int)
#define grammar_index_to_sequential_phrase_number(x) READ(x,grammar_index_to_sequential_phrase_number,int)
// As above, but maps to the phrase name.
static DECLARE(phrasename, STRINGINDEX, MAX_PHRASES); /* STRINGINDEX is an index into *** StringPool *** */
#define _phrasename(x) WRITE(x,phrasename,STRINGINDEX)
#define phrasename(x) READ(x,phrasename,STRINGINDEX)
int current_def_simple_phraseno=unassigned;
// For the comment attached to each P<...> definition.
//static DECLARE(xcomment, STRINGINDEX, MAX_COMMENT); /* STRINGINDEX is an index into *** StringPool *** */
//#define _xcomment(x) WRITE(x,xcomment,STRINGINDEX)
//#define xcomment(x) READ(x,xcomment,STRINGINDEX)
//STRINGINDEX current_comment;
static int NEXT_FREE_SIMPLE_PHRASENO = 0, NUM_SIMPLE_PHRASES = 0;
STRINGINDEX current_def_phrasename = unassigned;
STRINGINDEX current_use_phrasename = unassigned;
// For the name of C<...> code.
static DECLARE(semantic_phrasename, STRINGINDEX, MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _semantic_phrasename(x) WRITE(x,semantic_phrasename,STRINGINDEX)
#define semantic_phrasename(x) READ(x,semantic_phrasename,STRINGINDEX)
// For the body of C<...> code.
static DECLARE(semantic_code, STRINGINDEX, MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _semantic_code(x) WRITE(x,semantic_code,STRINGINDEX)
#define semantic_code(x) READ(x,semantic_code,STRINGINDEX)
static int NEXT_FREE_SEMANTIC_PHRASENO = 0, NUM_SEMANTIC_PHRASES;
int current_def_semantic_phraseno = 0;
// For unnamed external blocks:
static DECLARE(initcode, wchar_t, MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _initcode(x) WRITE(x,initcode,wchar_t)
#define initcode(x) READ(x,initcode,wchar_t)
static int initcode_nextfree = 0;
// For the array of keywords
static DECLARE(keyword, STRINGINDEX, MAX_KEYWORDS);
#define _keyword(x) WRITE(x,keyword,STRINGINDEX)
#define keyword(x) READ(x,keyword,STRINGINDEX)
static KEYWORD_INDEX NEXT_FREE_KEYWORD = 0, NUM_KEYWORDS = 0;
STRINGINDEX current_keyword=unassigned;
// For the array of regular expressions
static DECLARE(regexps, STRINGINDEX, MAX_REGEXPS);
#define _regexps(x) WRITE(x,regexps,STRINGINDEX)
#define regexps(x) READ(x,regexps,STRINGINDEX)
static REGEXP_INDEX NEXT_FREE_REGEXP = 0, NUM_REGEXPS = 0;
STRINGINDEX current_regexp=unassigned;
// The main grammar table:
static DECLARE(gram, ENTRY, MAX_GRAMMAR);
#define _gram(x) WRITE(x,gram,ENTRY)
#define gram(x) READ(x,gram,ENTRY)
static int NEXT_FREE_GRAMMAR_SLOTNO = 0, NUM_GRAMMAR;
#define NUM_PHRASES (NUM_BIPS + NUM_SIMPLE_PHRASES + NUM_SEMANTIC_PHRASES)
// These could be (inline) functions if we need to avoid this construct.
#define MaxINT(a,b) ({int A = a, B = b; A>B?A:B;}) /* avoid evaluating params twice. */
#define MinINT(a,b) ({int A = a, B = b; A<B?A:B;}) /* avoid evaluating params twice. */
int pass = 0;
static STRINGINDEX upto(wchar_t ends) {
STRINGINDEX temp = Str_nextfree;
for (;;) {
wint_t c = get_wide_char(grammar);
if (c == WEOF) fatal(WEOF);
if (/*ferror(grammar) ||*/ ((Str_nextfree-temp) >= 1023)) fatal(ends);
if (c == L'\\') { // \\ \' \"
c = get_wide_char(grammar);
if (c == WEOF) fatal(WEOF);
if (ferror(grammar)) fatal(ends);
if (c != L'\\') {
_StringPool(Str_nextfree++) = L'\\';
_StringPool(Str_nextfree) = L'\0';
}
} else if (c == ends) {
_StringPool(Str_nextfree++) = L'\0';
return temp;
}
_StringPool(Str_nextfree++) = c; _StringPool(Str_nextfree) = L'\0';
}
}
static STRINGINDEX c_code_block(void) {
int c, level = 0;
STRINGINDEX here = Str_nextfree;
for (;;) {
c = next_wide_char();
if (c == '{') level += 1;
if ((c == '}') && (level == 0)) break;
if (c == '}') level -= 1;
_StringPool(Str_nextfree++) = c;
}
_StringPool(Str_nextfree++) = '\0';
return here;
}
static KEYWORD_INDEX keyword_code(STRINGINDEX newkeyword)
{
// buggy if newkeyword is the empty string :-( FIX! TO DO Presumably regexp_code *could* have the same issue
// Fault is probably in String() or wherever this string was entered into the StringPool.
int i;
if (pass == 0) {
_keyword(NEXT_FREE_KEYWORD) = newkeyword;
}
for (i = 0; i <= NEXT_FREE_KEYWORD; i++) {
if (wcscmp(String(newkeyword), String(keyword(i))) == 0) {
if (pass == 0) break;
return KEYWORD_TYPE | i;
}
}
if (pass == 0) {
if (i == NEXT_FREE_KEYWORD) NEXT_FREE_KEYWORD++;
if (NEXT_FREE_KEYWORD >= NUM_KEYWORDS) NUM_KEYWORDS = NEXT_FREE_KEYWORD;
return KEYWORD_TYPE | i;
} else {
fprintf(stderr, "keyword_code - internal error: cannot find \"%ls\" in keyword table.\n", String(newkeyword));
exit(EXIT_FAILURE);
}
}
static REGEXP_INDEX regexp_code(STRINGINDEX newregexp)
{
int i;
if (pass == 0) {
_regexps(NEXT_FREE_REGEXP) = newregexp;
}
for (i = 0; i <= NEXT_FREE_REGEXP; i++) {
if (wcscmp(String(newregexp), String(regexps(i))) == 0) {
if (pass == 0) break;
return REGEXP_TYPE | i;
}
}
if (pass == 0) {
if (i == NEXT_FREE_REGEXP) NEXT_FREE_REGEXP++;
if (NEXT_FREE_REGEXP >= NUM_REGEXPS) NUM_REGEXPS = NEXT_FREE_REGEXP;
return REGEXP_TYPE | i;
} else {
fprintf(stderr, "regexp_code - internal error: cannot find \"%ls\" in regexp table.\n", String(newregexp));
exit(EXIT_FAILURE);
}
}
// The output routines take a file as a parameter to implement the
// multi-pass aspect - on all but the output pass, the output file
// is redirected to a sink device (e.g. /dev/null)
static void print_type(FILE *f, int type) {
switch (type<<GRAMMAR_TYPE_SHIFT) {
case BIP_TYPE:
fprintf(f, "BIP_TYPE | "); break;
case PHRASE_TYPE:
fprintf(f, "PHRASE_TYPE | "); break;
case SEMANTIC_TYPE:
fprintf(f, "SEMANTIC_TYPE | "); break;
case KEYWORD_TYPE:
fprintf(f, "KEYWORD_TYPE | "); break;
case CHAR_TYPE:
fprintf(f, "CHAR_TYPE | "); break;
case UTF32CHAR_TYPE:
fprintf(f, "UTF32CHAR_TYPE | "); break;
case STRING_TYPE:
fprintf(f, "STRING_TYPE | "); break;
case UTF32STRING_TYPE:
fprintf(f, "UTF32STRING_TYPE | "); break;
case REGEXP_TYPE:
fprintf(f, "REGEXP_TYPE | "); break;
case OPTION_TYPE:
fprintf(f, "OPTION_TYPE | "); break;
// to make understanding the phrase structure table easier,
// we tag every gram[] entry whether we need to or not:
case COUNT_OF_PHRASES:
fprintf(f, "COUNT_OF_PHRASES | "); break;
case COUNT_OF_ALTS:
fprintf(f, "COUNT_OF_ALTS | "); break;
case ALT_NUMBER:
fprintf(f, "ALT_NUMBER | "); break;
default:
fprintf(f, "(%02x << GRAMMAR_TYPE_SHIFT) | ", type); break;
}
}
static void escape(FILE *out, wchar_t *s, wchar_t *indent) {
while (*s != '\0') {
wint_t c = *s++;
if (c == '\\') {
fputc('\\', out);
fputc('\\', out); // BUGFIX?
} else if (c == '\"') {
fputc('\\', out);
fputc('\"', out);
} else if (c == '\n') {
fputc('\\', out);
fputc('n', out);
if (indent && (*s != '\0')) {
fprintf(out, "%ls", indent);
}
} else {
fprintf(out, "%lc", c);
}
}
}
static void print_bool(FILE *f, int b, char *s) {
if (b) fprintf(f, "%s | ", s);
}
static void dump_tables(FILE *f) {
int i;
fprintf(f, "#ifndef _GRAMMAR_H_\n");
fprintf(f, "#define _GRAMMAR_H_ 1\n");
fprintf(f, "\n");
fprintf(f, "#include <wchar.h>\n");
fprintf(f, "\n");
fprintf(f, "#ifndef TRUE\n");
fprintf(f, "#define TRUE (0==0)\n");
fprintf(f, "#endif\n");
fprintf(f, "\n");
fprintf(f, "#ifndef FALSE\n");
fprintf(f, "#define FALSE (0!=0)\n");
fprintf(f, "#endif\n");
fprintf(f, "\n");
fprintf(f, "typedef int (*parsefn)(void);\n");
fprintf(f, "\n");
fprintf(f, "#define LARGEST_ALT %d"
" // Max number of phrases in any Alt: 0 (Reserved), 1:%d\n\n",
LARGEST_ALT+1+2, LARGEST_ALT+2);
// A modern C preprocessor trick to convert a #define to a string:
// If this construct isn't supported, just enter the definitions
// manually, but beware that by having a second copy here there is
// a possibility that the two definitions can get out of sync if
// the master copy above is changed.
#define _textof(x) #x
#define textof(x) _textof(x)
fprintf(f, "#define NEGATED_PHRASE %s\n", textof(NEGATED_PHRASE));
fprintf(f, "#define GUARD_PHRASE %s\n", textof(GUARD_PHRASE));
fprintf(f, "#define WHITESPACE_ALLOWED %s\n", textof(WHITESPACE_ALLOWED)); // <-- may change to use "O<...> = "
fprintf(f, "#define GRAMMAR_TYPE_SHIFT %s\n", textof(GRAMMAR_TYPE_SHIFT));
fprintf(f, "#define GRAMMAR_TYPE_MASK %s\n", textof(GRAMMAR_TYPE_MASK));
fprintf(f, "#define BIP_TYPE %s\n", textof(BIP_TYPE));
fprintf(f, "#define PHRASE_TYPE %s\n", textof(PHRASE_TYPE));
fprintf(f, "#define SEMANTIC_TYPE %s\n", textof(SEMANTIC_TYPE));
fprintf(f, "#define KEYWORD_TYPE %s\n", textof(KEYWORD_TYPE));
fprintf(f, "#define CHAR_TYPE %s\n", textof(CHAR_TYPE));
fprintf(f, "#define UTF32CHAR_TYPE %s\n", textof(UTF32CHAR_TYPE));
fprintf(f, "#define STRING_TYPE %s\n", textof(STRING_TYPE));
fprintf(f, "#define UTF32STRING_TYPE %s\n", textof(UTF32STRING_TYPE));
fprintf(f, "#define REGEXP_TYPE %s\n", textof(REGEXP_TYPE));
fprintf(f, "#define OPTION_TYPE %s\n", textof(OPTION_TYPE));
fprintf(f, "#define COUNT_OF_ALTS %s\n", textof(COUNT_OF_ALTS));
fprintf(f, "#define COUNT_OF_PHRASES %s\n", textof(COUNT_OF_PHRASES));
fprintf(f, "#define ALT_NUMBER %s\n", textof(ALT_NUMBER));
fprintf(f, "#define INDEX_MASK %s\n", textof(INDEX_MASK));
fprintf(f, "// (We have room for types 1..31)\n");
fprintf(f, "#define PhraseType(idx) %s\n", textof(PhraseType(idx)));
fprintf(f, "\n");
// NEXT_FREE_BIPNO refers to the internal sequence number, for example, if there were
// only two BIPs referenced in a grammar: B<fred>=42; and B<jim>=69; then
// the internal BIP numbers for those two would be 0 and 1 respectively, with
// 'NEXT_FREE_BIPNO' being set to 2. BIP(0) would be 42 and BIP(1) would be 69.
// The B_ constant should be the number the grammar writer sees, i. 42 or 69.
// This range compression parallels what I used to do for phrases as well, and
// I'm wondering if it contributes unnecessary complexity to the code, and that
// I should make BIP numbers sparse as I've done for regular phrase numbers.
PHRASE_BASE = BIP_BASE+NUM_BIPS;
SEMANTIC_BASE = PHRASE_BASE + NUM_SIMPLE_PHRASES;
AST_BASE = SEMANTIC_BASE + NUM_SEMANTIC_PHRASES;
fprintf(f, "\n");
fprintf(f, "#define BIP_BASE %d\n", BIP_BASE);
fprintf(f, "#define PHRASE_BASE %d\n", PHRASE_BASE);
fprintf(f, "#define SEMANTIC_BASE %d\n", SEMANTIC_BASE);
fprintf(f, "#define AST_BASE %d\n\n", AST_BASE);
fprintf(f, "#define NUM_BIPS %d\n", NUM_BIPS);
fprintf(f, "#define NUM_SIMPLE_PHRASES %d\n", NUM_SIMPLE_PHRASES);
fprintf(f, "#define NUM_SEMANTIC_PHRASES %d\n", NUM_SEMANTIC_PHRASES);
fprintf(f, "#define NUM_PHRASES"
" (NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES)\n\n");
fprintf(f, "#define NUM_KEYWORDS %d\n", NUM_KEYWORDS);
fprintf(f, "#define NUM_REGEXPS %d\n", NUM_REGEXPS);
fprintf(f, "#define NUM_GRAMMAR %d\n", NUM_GRAMMAR);
fprintf(f, "\n");
// Current code revision: we're using separate namespaces for bips, phrases, and semantic code.
for (i = 0; i < NUM_BIPS; i++) {
fprintf(f, "#define B_%ls %ld\n", CString(bip_phrasename(i)), bip_map(i));
}
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
fprintf(f, "#define P_%ls %d\n", CString(phrasename(i)), i +NUM_BIPS ); // <--- staying compatible with original for now.
}
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, "#define S_%ls %d\n", CString(semantic_phrasename(i)), i);
}
fprintf(f, "\n");
fprintf(f, "extern const int bip_map[NUM_BIPS];\n");
fprintf(f, "extern const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES];\n");
fprintf(f, "extern const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES];\n\n");
fprintf(f, "extern const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES];\n");
fprintf(f, "extern const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES];\n");
fprintf(f, "extern const wchar_t *xcomment[NUM_PHRASES];\n");
fprintf(f, "extern const wchar_t *keyword[NUM_KEYWORDS];\n");
fprintf(f, "extern const wchar_t *regexps[NUM_REGEXPS];\n");
fprintf(f, "\n");
fprintf(f, "extern const int gram[NUM_GRAMMAR];\n");
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
int grammar_index = sequential_phrase_no_to_grammar_index(i);
fprintf(f, "#define G_%ls %d\n", CString(phrasename(i)), grammar_index);
}
fprintf(f, "\n");
fprintf(f, "extern parsefn parsetime[NUM_SEMANTIC_PHRASES];\n");
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, "extern int parse_%ls(void);\n", CString(semantic_phrasename(i)));
}
fprintf(f, "\n");
fprintf(f, "#ifndef SUPPRESS_DATA\n");
fprintf(f, "const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n");
for (i = 0; i < NUM_BIPS; i++) {
fprintf(f, " /*%d+%d*/ L\"%ls\" /*%ld*/,\n",
0, i,
String(bip_phrasename(i)),
bip_map(i));
}
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
//int grammar_index = sequential_phrase_no_to_grammar_index(i);
fprintf(f, " /*%d+%d*/ L\"%ls\",\n",
NUM_BIPS,
i,
String(phrasename(i)));
}
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, " /*%d+%d*/ L\"%ls\",\n",
NUM_BIPS+NUM_SIMPLE_PHRASES, i,
String(semantic_phrasename(i)));
}
fprintf(f, "};\n");
fprintf(f, "const wchar_t *phrasename_c[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n");
for (i = 0; i < NUM_BIPS; i++) {
fprintf(f, " /*%d+%d*/ L\"%ls\" /*%ld*/,\n",
0, i,
CString(bip_phrasename(i)),
bip_map(i));
}
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
//int grammar_index = sequential_phrase_no_to_grammar_index(i);
fprintf(f, " /*%d+%d*/ L\"%ls\",\n",
NUM_BIPS,
i,
CString(phrasename(i)));
}
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, " /*%d+%d*/ L\"%ls\",\n",
NUM_BIPS+NUM_SIMPLE_PHRASES, i,
CString(semantic_phrasename(i)));
}
fprintf(f, "};\n");
fprintf(f, "const int bip_map[NUM_BIPS] = {\n");
for (i = 0; i < NUM_BIPS; i++) {
fprintf(f, " /*%d*/ %ld,\n", i, bip_map(i));
}
fprintf(f, "};\n");
fprintf(f, "const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES] = {\n");
{
for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
int grammar_index = sequential_phrase_no_to_grammar_index(i);
fprintf(f, " G_%ls, /*%d*/\n", CString(phrasename(i)), grammar_index);
}
}
fprintf(f, "};\n");
fprintf(f, "\n");
fprintf(f, "const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES] = {\n");
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, " /*%d*/ L\"%ls\",\n", i, String(semantic_phrasename(i)));
}
fprintf(f, "};\n\n");
fprintf(f, "const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES] = {\n");
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, " /*%d*/ L\"", i);
escape(f, String(semantic_code(i)), L"\"\n \"");
fprintf(f, "\",\n");
}
fprintf(f, "};\n\n");
fprintf(f, "parsefn parsetime[NUM_SEMANTIC_PHRASES] = {\n");
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, " /*%d*/ &parse_%ls,\n", i, CString(semantic_phrasename(i)));
}
fprintf(f, "};\n\n");
fprintf(f, "// Comments are stored so that they can be re-inserted, should\n");
fprintf(f, "// we need to regenerate a grammar.g file from this header file.\n");
fprintf(f, "const wchar_t *xcomment[NUM_PHRASES] = {\n");
for (i = 0; i < NUM_PHRASES; i++) {
fprintf(f, " /*%3d*/ ", i);
//if (xcomment(i) < 0 /* TO DO */ /* == NULL*/) {
fprintf(f, " NULL");
//} else {
// fprintf(f, " L\"");
// escape(f, String(xcomment(i)), L"\"\n \"");
// fprintf(f, "\"");
//}
fprintf(f, ",\n");
}
fprintf(f, "};\n");
fprintf(f, "const wchar_t *keyword[NUM_KEYWORDS] = {\n");
for (i = 0; i < NUM_KEYWORDS; i++) {
fprintf(f, " /*%3d*/ L\"", i);
escape(f, String(keyword(i)), NULL);
fprintf(f, "\",\n");
}
fprintf(f, "};\n");
fprintf(f, "const wchar_t *regexps[NUM_REGEXPS] = {\n");
for (i = 0; i < NUM_REGEXPS; i++) {
fprintf(f, " /*%d*/ L\"%ls\",\n", i, String(regexps(i)));
}
fprintf(f, "};\n");
fprintf(f, "const int gram[NUM_GRAMMAR /* %d */] = {\n", NUM_GRAMMAR);
for (i = 0; i < NUM_GRAMMAR; i++) {
int p;
for (p = 0; p < NUM_SIMPLE_PHRASES; p++) {
if (sequential_phrase_no_to_grammar_index(p) == i) {
fprintf(f, "\n// P<%ls> = ...;\n", String(phrasename(p)));
}
}
int type = PhraseType(gram(i));
int negated = gram(i) & NEGATED_PHRASE;
int guard = gram(i) & GUARD_PHRASE;
int whitespace = gram(i) & WHITESPACE_ALLOWED;
int index = gram(i) & INDEX_MASK;
fprintf(f, " /*%3d*/ ", i);
if ((type<<GRAMMAR_TYPE_SHIFT) == BIP_TYPE) {
print_type(f, BIP_TYPE>>GRAMMAR_TYPE_SHIFT);
if (index == 0 /* B_EOF */) whitespace = 1;
// BIPs should allow whitespace before.
// Well, at least EOF should. Not sure about any others.
} else {
print_type(f, type);
}
print_bool(f, negated, "NEGATED_PHRASE ");
print_bool(f, guard, "GUARD_PHRASE ");
print_bool(f, whitespace, "WHITESPACE_ALLOWED");
if ((type<<GRAMMAR_TYPE_SHIFT) == PHRASE_TYPE) {
// And we need to distinguish between regular phrases and BIPS
// where a phrase is used in a rule:
// if (debug) fprintf(f, "G_%ls /*0x%06x*/,\n", CString(phrasename(index)), index); else
//fprintf(f, "G_%ls, /* gram[%d]=%d (plus bits) use phrasename(%d) */\n", CString(phrasename(index)), i, index, index);
fprintf(f, "G_%ls,\n", CString(phrasename(index)));
} else if ((type<<GRAMMAR_TYPE_SHIFT) == BIP_TYPE) {
fprintf(f, "B_%ls,\n", CString(bip_phrasename(index)));
} else if ((type<<GRAMMAR_TYPE_SHIFT) == SEMANTIC_TYPE) {
fprintf(f, "S_%ls,\n", CString(semantic_phrasename(index)));
} else {
fprintf(f, "0x%06x,", index);
if ((type<<GRAMMAR_TYPE_SHIFT) == REGEXP_TYPE) {
fprintf(f, " /* %ls */", String(regexps(index)));
} else if ((type<<GRAMMAR_TYPE_SHIFT) == KEYWORD_TYPE) {
fprintf(f, " /* %ls */", String(keyword(index)));
} else if ((type<<GRAMMAR_TYPE_SHIFT) == CHAR_TYPE) {
fprintf(f, " /* '%c' */", index&0xFF);
}
fprintf(f, "\n");
}
}
fprintf(f, "};\n\n");
fprintf(f, "%ls\n", &initcode(0));
// procedure bodies come last so that they can use all the data created above.
for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
fprintf(f, "int parse_");
fprintf(f, "%ls", CString(semantic_phrasename(i)));
fprintf(f, "(void)\n{%ls}\n", String(semantic_code(i)));
}
for (i = 0; i < NUM_BIPS; i++) {
fprintf(f, "\n// B<%ls> = %ld;\n", CString(bip_phrasename(i)), bip_map(i));
}
fprintf(f, "\n// E\n");
fprintf(f, "#endif // SUPPRESS_DATA\n");
fprintf(f, "#endif // _GRAMMAR_H_\n");
}