takeon-output.c

// This code outputs the tables generated by takeon.c  It is included within takeon.c

static int verbose = FALSE;

//#define PARM_NO_FLEX 1 // Enable this to avoid the use of flex
                         // arrays and to speed up execution.
#include "flex.h"

static FILE *grammar; // The input file.  It is read from in
                      // multiple passes to handle forward references.

// All input comes through these two procedures:

static int file_offset = 0, source_line = 1, source_column = 1, last_line_last_column = 1;
void fatal_(int error, int line) {
  fprintf(stderr, "%s:%d:%d: Error: ", grammarfn, source_line, source_column);
  if (error == WEOF) {
    fprintf(stderr, "Premature end of file at line %d."
                    " Grammar should end with a 'E' command."
                    " (detected in %s, line %d)\n",
            source_line, __FILE__, line);
    if (source_line == 1) {
      fprintf(stderr, "(You're not by any chance trying to compile using tcc,"
                      " are you? Its wchar_t support seems to be very broken...)"
                      "\n");
    }
  } else if (error == 1) {
    // fail silently as problem already reported
  } else if (error == '0') {
    fprintf(stderr, "Syntax error at line %d while looking for a digit."
                    " (detected in %s, line %d)\n",
            source_line, __FILE__, line);
  } else if (error > ' ') {
    fprintf(stderr, "Syntax error at line %d while looking for a '%c'."
                    " (detected in %s, line %d)\n",
            source_line, error, __FILE__, line);
  } else {
    fprintf(stderr, "Syntax error at line %d (detected in %s, line %d)\n",
            source_line, __FILE__, line);
  }
  exit(EXIT_FAILURE);
}
#define fatal(n) fatal_(n,__LINE__)

// Need something similar for warnings.

wint_t get_wide_char(FILE *f) {
  wint_t c = fgetwc(f);
  if (c == '\n') {
    source_line += 1;
    last_line_last_column = source_column;
    source_column = 1;
  } else source_column += 1;
  //if (verbose) fputwc(c, stderr);
  file_offset += 1;
  return c;
}

void unget_wide_char(wint_t c, FILE *f) {
  ungetwc(c, f);
  file_offset -= 1;
  if (c == '\n') {
    source_line -= 1; source_column = last_line_last_column;
  } else {
    source_column -= 1;
    //if (verbose) fputwc(L'\b', stderr);
  }
}

static wint_t next_wide_char(void) {
  wint_t c = get_wide_char(grammar);
  if ((c == WEOF) /*|| ferror(grammar)*/) fatal(WEOF);
  return c;
}

static wint_t nonspace(void) {
  for (;;) {
    wint_t c = get_wide_char(grammar);
    if ((c == WEOF) /* || ferror(grammar)*/) fatal(WEOF);
    if (!isspace(c)) return(c);
  }
}

// Every entry in the grammar consists of a type code and a data value, usually
// an index into some array or other.

// Type of an entry in the main grammar array gram[]:
typedef unsigned long int ENTRY;
typedef int KEYWORD_INDEX;
typedef int REGEXP_INDEX;
typedef int LITSTR_INDEX;
typedef int GRAMMAR_INDEX;

#define NEGATED_PHRASE     (1U<<24U)
#define GUARD_PHRASE       (1U<<25U)
#define WHITESPACE_ALLOWED (1U<<26U)

#define GRAMMAR_TYPE_SHIFT 27U
#define GRAMMAR_TYPE_MASK  31U
#define INDEX_MASK         0xFFFFFFU

// We have enough spare bits in a grammar table entry
// to support types with values 1 through 31.
//
// I never use 0 in enumerations like this as it
// helps catch errors from variables which have
// not had a type explicitly set in them.

#define BASE_BIP_TYPE            1U
#define BASE_PHRASE_TYPE         2U
#define BASE_SEMANTIC_TYPE       3U
#define BASE_KEYWORD_TYPE        4U
#define BASE_CHAR_TYPE           5U
#define BASE_UTF32CHAR_TYPE      6U
#define BASE_STRING_TYPE         7U
#define BASE_UTF32STRING_TYPE    8U
#define BASE_REGEXP_TYPE         9U
#define BASE_OPTION_TYPE        10U
#define BASE_COUNT_OF_ALTS      11U
#define BASE_COUNT_OF_PHRASES   12U
#define BASE_ALT_NUMBER         13U

#define BIP_TYPE           (BASE_BIP_TYPE           <<GRAMMAR_TYPE_SHIFT)
#define PHRASE_TYPE        (BASE_PHRASE_TYPE        <<GRAMMAR_TYPE_SHIFT)
#define SEMANTIC_TYPE      (BASE_SEMANTIC_TYPE      <<GRAMMAR_TYPE_SHIFT)
#define KEYWORD_TYPE       (BASE_KEYWORD_TYPE       <<GRAMMAR_TYPE_SHIFT)
#define CHAR_TYPE          (BASE_CHAR_TYPE          <<GRAMMAR_TYPE_SHIFT)
#define UTF32CHAR_TYPE     (BASE_UTF32CHAR_TYPE     <<GRAMMAR_TYPE_SHIFT)
#define STRING_TYPE        (BASE_STRING_TYPE        <<GRAMMAR_TYPE_SHIFT)
#define UTF32STRING_TYPE   (BASE_UTF32STRING_TYPE   <<GRAMMAR_TYPE_SHIFT)
#define REGEXP_TYPE        (BASE_REGEXP_TYPE        <<GRAMMAR_TYPE_SHIFT)
#define OPTION_TYPE        (BASE_OPTION_TYPE        <<GRAMMAR_TYPE_SHIFT)
#define COUNT_OF_ALTS      (BASE_COUNT_OF_ALTS      <<GRAMMAR_TYPE_SHIFT)
#define COUNT_OF_PHRASES   (BASE_COUNT_OF_PHRASES   <<GRAMMAR_TYPE_SHIFT)
#define ALT_NUMBER         (BASE_ALT_NUMBER         <<GRAMMAR_TYPE_SHIFT)

#define PhraseType(idx) ((((idx)>>GRAMMAR_TYPE_SHIFT)&GRAMMAR_TYPE_MASK))

// These are not hard limits. The Flex structure can expand to as large
// as necessary.  These are only here 1) in case we're using fixed-size
// arrays rather than flex arrays, or 2) to limit runaway coding errors.
// These MAX sizes do not propogate to the header file.
#define MAX_GRAMMAR  (1024*16)
#define MAX_PHRASES  (1024*4) // extra during debugging
#define MAX_KEYWORDS 1024
#define MAX_REGEXPS  1024
#define MAX_LIT      (1024*128)
#define MAX_COMMENT  4096
#define MAX_C        4096
#define MAX_BIPS     64

static int LARGEST_ALT = 0;

static int BIP_BASE = 0;
static int PHRASE_BASE = 0;
static int SEMANTIC_BASE = 0;
static int AST_BASE = 0;


// Apart from StringPool, which is used internally, these arrays are
// the ones which are eventually written out to the header file as
// const arrays.  Internally we refer to strings by an index into the
// stringpool, but in the header file, the arrays of strings are literal
// strings. (Except in the grammar where we uses indices into the individual
// arrays, of regexps, keywords, etc.)

// All strings are stored here.
typedef int STRINGINDEX;
static DECLARE(StringPool, wchar_t, MAX_LIT);
#define _StringPool(x) WRITE(x,StringPool,wchar_t)
#define  StringPool(x)  READ(x,StringPool,wchar_t)
static STRINGINDEX Str_nextfree = 0;
#define String(x) &_StringPool(x)

// Filter '-' out of strings so they can be used as C variables,
// and convert spaces in identifiers to '_'
wchar_t *CString(STRINGINDEX x) {
  wchar_t *Orig = String(x);
  STRINGINDEX Result = Str_nextfree;
  do {
    wint_t wc;
    wc = *Orig;
    if (wc == '-' || wc == ' ') wc = '_';
    _StringPool(Str_nextfree++) = wc;
  } while (*Orig++ != '\0');
  return String(Result);
}


// DUE TO PROBLEMS SETTING UP GRAM VALUES PROPERLY, I'M SEPARATING BIP/PHRASE/C INTO THEIR OWN TABLES AND INDEXES FROM 0.

// For the name of B<...> code.
static DECLARE(bip_phrasename, STRINGINDEX,  MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _bip_phrasename(x) WRITE(x,bip_phrasename,STRINGINDEX)
#define  bip_phrasename(x)  READ(x,bip_phrasename,STRINGINDEX)
// Map of user-supplied BIP number to a consecutive sequence used internally
static DECLARE(bip_map, ENTRY,  MAX_BIPS);
#define _bip_map(x) WRITE(x,bip_map,ENTRY)
#define  bip_map(x)  READ(x,bip_map,ENTRY)
static int NEXT_FREE_BIPNO = 0, NUM_BIPS = 0;
int current_def_bipno=unassigned,
    current_internal_bipno=unassigned,
    current_user_bipno=unassigned;

// For the offset into the grammar where the sequentially-numbered P<name> is stored.
static DECLARE(sequential_phrase_no_to_grammar_index, ENTRY,  MAX_PHRASES);
#define _sequential_phrase_no_to_grammar_index(x) WRITE(x,sequential_phrase_no_to_grammar_index,ENTRY)
#define  sequential_phrase_no_to_grammar_index(x)  READ(x,sequential_phrase_no_to_grammar_index,ENTRY)
// For the name of B<> and P<...> definitions. And later also C<> definitions will be added.

// Maps the location in the grammar where a phrase is stored
// to the sequential phrase number, i.e. a mapping of G_x to P_x
// NOT YET USED.
static DECLARE(grammar_index_to_sequential_phrase_number, int,  MAX_PHRASES);
#define _grammar_index_to_sequential_phrase_number(x) WRITE(x,grammar_index_to_sequential_phrase_number,int)
#define  grammar_index_to_sequential_phrase_number(x)  READ(x,grammar_index_to_sequential_phrase_number,int)

// As above, but maps to the phrase name.
static DECLARE(phrasename, STRINGINDEX,  MAX_PHRASES); /* STRINGINDEX is an index into *** StringPool *** */
#define _phrasename(x) WRITE(x,phrasename,STRINGINDEX)
#define  phrasename(x)  READ(x,phrasename,STRINGINDEX)
int current_def_simple_phraseno=unassigned;

// For the comment attached to each P<...> definition.
//static DECLARE(xcomment, STRINGINDEX,  MAX_COMMENT); /* STRINGINDEX is an index into *** StringPool *** */
//#define _xcomment(x) WRITE(x,xcomment,STRINGINDEX)
//#define  xcomment(x)  READ(x,xcomment,STRINGINDEX)

//STRINGINDEX current_comment;

static int NEXT_FREE_SIMPLE_PHRASENO = 0, NUM_SIMPLE_PHRASES = 0;
STRINGINDEX current_def_phrasename = unassigned;
STRINGINDEX current_use_phrasename = unassigned;

// For the name of C<...> code.
static DECLARE(semantic_phrasename, STRINGINDEX,  MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _semantic_phrasename(x) WRITE(x,semantic_phrasename,STRINGINDEX)
#define  semantic_phrasename(x)  READ(x,semantic_phrasename,STRINGINDEX)
// For the body of C<...> code.
static DECLARE(semantic_code, STRINGINDEX,  MAX_C); /* STRINGINDEX is an index into *** StringPool *** */
#define _semantic_code(x) WRITE(x,semantic_code,STRINGINDEX)
#define  semantic_code(x)  READ(x,semantic_code,STRINGINDEX)
static int NEXT_FREE_SEMANTIC_PHRASENO = 0, NUM_SEMANTIC_PHRASES;
int current_def_semantic_phraseno = 0;

// For unnamed external blocks:
static DECLARE(initcode, wchar_t,  MAX_C);   /* STRINGINDEX is an index into *** StringPool *** */
#define _initcode(x) WRITE(x,initcode,wchar_t)
#define  initcode(x)  READ(x,initcode,wchar_t)
static int initcode_nextfree = 0;

// For the array of keywords
static DECLARE(keyword, STRINGINDEX,  MAX_KEYWORDS);
#define _keyword(x) WRITE(x,keyword,STRINGINDEX)
#define  keyword(x)  READ(x,keyword,STRINGINDEX)
static KEYWORD_INDEX NEXT_FREE_KEYWORD = 0, NUM_KEYWORDS = 0;
STRINGINDEX current_keyword=unassigned;

// For the array of regular expressions
static DECLARE(regexps, STRINGINDEX,  MAX_REGEXPS);
#define _regexps(x) WRITE(x,regexps,STRINGINDEX)
#define  regexps(x)  READ(x,regexps,STRINGINDEX)
static REGEXP_INDEX NEXT_FREE_REGEXP = 0, NUM_REGEXPS = 0;
STRINGINDEX current_regexp=unassigned;

// The main grammar table:
static DECLARE(gram, ENTRY, MAX_GRAMMAR);
#define _gram(x) WRITE(x,gram,ENTRY)
#define  gram(x)  READ(x,gram,ENTRY)
static int NEXT_FREE_GRAMMAR_SLOTNO = 0, NUM_GRAMMAR;

#define NUM_PHRASES (NUM_BIPS + NUM_SIMPLE_PHRASES + NUM_SEMANTIC_PHRASES)

// These could be (inline) functions if we need to avoid this construct.
#define MaxINT(a,b) ({int A = a, B = b; A>B?A:B;}) /* avoid evaluating params twice. */
#define MinINT(a,b) ({int A = a, B = b; A<B?A:B;}) /* avoid evaluating params twice. */

int pass = 0;

static STRINGINDEX upto(wchar_t ends) {
  STRINGINDEX temp = Str_nextfree;
  for (;;) {
    wint_t c = get_wide_char(grammar);
    if (c == WEOF) fatal(WEOF);
    if (/*ferror(grammar) ||*/ ((Str_nextfree-temp) >= 1023)) fatal(ends);
    if (c == L'\\') {  //    \\ \' \"
      c = get_wide_char(grammar);
      if (c == WEOF) fatal(WEOF);
      if (ferror(grammar)) fatal(ends);
      if (c != L'\\') {
        _StringPool(Str_nextfree++) = L'\\';
        _StringPool(Str_nextfree) = L'\0';
      }
    } else if (c == ends) {
      _StringPool(Str_nextfree++) = L'\0';
      return temp;
    }
    _StringPool(Str_nextfree++) = c; _StringPool(Str_nextfree) = L'\0';
  }
}

static STRINGINDEX c_code_block(void) {
  int c, level = 0;
  STRINGINDEX here = Str_nextfree;
  for (;;) {
    c = next_wide_char();
    if (c == '{') level += 1;
    if ((c == '}') && (level == 0)) break;
    if (c == '}') level -= 1;
    _StringPool(Str_nextfree++) = c;
  }
  _StringPool(Str_nextfree++) = '\0';
  return here;
}

static KEYWORD_INDEX keyword_code(STRINGINDEX newkeyword)
{
  // buggy if newkeyword is the empty string :-(  FIX!  TO DO Presumably regexp_code *could* have the same issue
  // Fault is probably in String() or wherever this string was entered into the StringPool.
  int i;
  if (pass == 0) {
    _keyword(NEXT_FREE_KEYWORD) = newkeyword;
  }
  
  for (i = 0; i <= NEXT_FREE_KEYWORD; i++) {
    if (wcscmp(String(newkeyword), String(keyword(i))) == 0) {
      if (pass == 0) break;
      return KEYWORD_TYPE | i;
    }
  }
  if (pass == 0) {
    if (i == NEXT_FREE_KEYWORD) NEXT_FREE_KEYWORD++;
    if (NEXT_FREE_KEYWORD >= NUM_KEYWORDS) NUM_KEYWORDS = NEXT_FREE_KEYWORD;
    return KEYWORD_TYPE | i;
  } else {
    fprintf(stderr, "keyword_code - internal error: cannot find \"%ls\" in keyword table.\n", String(newkeyword));
    exit(EXIT_FAILURE);
  }
}

static REGEXP_INDEX regexp_code(STRINGINDEX newregexp)
{
  int i;
  if (pass == 0) {
    _regexps(NEXT_FREE_REGEXP) = newregexp;
  }
  
  for (i = 0; i <= NEXT_FREE_REGEXP; i++) {
    if (wcscmp(String(newregexp), String(regexps(i))) == 0) {
      if (pass == 0) break;
      return REGEXP_TYPE | i;
    }
  }
  if (pass == 0) {
    if (i == NEXT_FREE_REGEXP) NEXT_FREE_REGEXP++;
    if (NEXT_FREE_REGEXP >= NUM_REGEXPS) NUM_REGEXPS = NEXT_FREE_REGEXP;
    return REGEXP_TYPE | i;
  } else {
    fprintf(stderr, "regexp_code - internal error: cannot find \"%ls\" in regexp table.\n", String(newregexp));
    exit(EXIT_FAILURE);
  }
}

// The output routines take a file as a parameter to implement the
// multi-pass aspect - on all but the output pass, the output file
// is redirected to a sink device (e.g. /dev/null)
static void print_type(FILE *f, int type) {
  switch (type<<GRAMMAR_TYPE_SHIFT) {
  case BIP_TYPE:
    fprintf(f, "BIP_TYPE         | "); break;
  case PHRASE_TYPE:
    fprintf(f, "PHRASE_TYPE      | "); break;
  case SEMANTIC_TYPE:
    fprintf(f, "SEMANTIC_TYPE    | "); break;
  case KEYWORD_TYPE:
    fprintf(f, "KEYWORD_TYPE     | "); break;
  case CHAR_TYPE:
    fprintf(f, "CHAR_TYPE        | "); break;
  case UTF32CHAR_TYPE:
    fprintf(f, "UTF32CHAR_TYPE   | "); break;
  case STRING_TYPE:
    fprintf(f, "STRING_TYPE      | "); break;
  case UTF32STRING_TYPE:
    fprintf(f, "UTF32STRING_TYPE | "); break;
  case REGEXP_TYPE:
    fprintf(f, "REGEXP_TYPE      | "); break;
  case OPTION_TYPE:
    fprintf(f, "OPTION_TYPE      | "); break;

  // to make understanding the phrase structure table easier,
  // we tag every gram[] entry whether we need to or not:
  case COUNT_OF_PHRASES:
    fprintf(f, "COUNT_OF_PHRASES | "); break;
  case COUNT_OF_ALTS:
    fprintf(f, "COUNT_OF_ALTS    | "); break;
  case ALT_NUMBER:
    fprintf(f, "ALT_NUMBER       | "); break;
  default:
    fprintf(f, "(%02x << GRAMMAR_TYPE_SHIFT) | ", type); break;
  }
}

static void escape(FILE *out, wchar_t *s, wchar_t *indent) {
  while (*s != '\0') {
    wint_t c = *s++;
    if (c == '\\') {
      fputc('\\', out);
      fputc('\\', out); // BUGFIX?
    } else if (c == '\"') {
      fputc('\\', out);
      fputc('\"', out);
    } else if (c == '\n') {
      fputc('\\', out);
      fputc('n', out);
      if (indent && (*s != '\0')) {
        fprintf(out, "%ls", indent);
      }
    } else {
      fprintf(out, "%lc", c);
    }
  }
}

static void print_bool(FILE *f, int b, char *s) {
  if (b) fprintf(f, "%s | ", s);
}

static void dump_tables(FILE *f) {
  int i;
  fprintf(f, "#ifndef _GRAMMAR_H_\n");
  fprintf(f, "#define _GRAMMAR_H_ 1\n");
  fprintf(f, "\n");
  fprintf(f, "#include <wchar.h>\n");
  fprintf(f, "\n");
  fprintf(f, "#ifndef TRUE\n");
  fprintf(f, "#define TRUE (0==0)\n");
  fprintf(f, "#endif\n");
  fprintf(f, "\n");
  fprintf(f, "#ifndef FALSE\n");
  fprintf(f, "#define FALSE (0!=0)\n");
  fprintf(f, "#endif\n");
  fprintf(f, "\n");
  fprintf(f, "typedef int (*parsefn)(void);\n");
  fprintf(f, "\n");

  fprintf(f, "#define LARGEST_ALT %d"
             " // Max number of phrases in any Alt: 0 (Reserved), 1:%d\n\n",
             LARGEST_ALT+1+2, LARGEST_ALT+2);

// A modern C preprocessor trick to convert a #define to a string:
// If this construct isn't supported, just enter the definitions
// manually, but beware that by having a second copy here there is
// a possibility that the two definitions can get out of sync if
// the master copy above is changed.
#define _textof(x) #x
#define textof(x) _textof(x)

  fprintf(f, "#define NEGATED_PHRASE     %s\n", textof(NEGATED_PHRASE));
  fprintf(f, "#define GUARD_PHRASE       %s\n", textof(GUARD_PHRASE));
  fprintf(f, "#define WHITESPACE_ALLOWED %s\n", textof(WHITESPACE_ALLOWED)); // <-- may change to use "O<...> = "
  fprintf(f, "#define GRAMMAR_TYPE_SHIFT %s\n", textof(GRAMMAR_TYPE_SHIFT));
  fprintf(f, "#define GRAMMAR_TYPE_MASK  %s\n", textof(GRAMMAR_TYPE_MASK));

  fprintf(f, "#define BIP_TYPE         %s\n", textof(BIP_TYPE));
  fprintf(f, "#define PHRASE_TYPE      %s\n", textof(PHRASE_TYPE));
  fprintf(f, "#define SEMANTIC_TYPE    %s\n", textof(SEMANTIC_TYPE));
  fprintf(f, "#define KEYWORD_TYPE     %s\n", textof(KEYWORD_TYPE));
  fprintf(f, "#define CHAR_TYPE        %s\n", textof(CHAR_TYPE));
  fprintf(f, "#define UTF32CHAR_TYPE   %s\n", textof(UTF32CHAR_TYPE));
  fprintf(f, "#define STRING_TYPE      %s\n", textof(STRING_TYPE));
  fprintf(f, "#define UTF32STRING_TYPE %s\n", textof(UTF32STRING_TYPE));
  fprintf(f, "#define REGEXP_TYPE      %s\n", textof(REGEXP_TYPE));
  fprintf(f, "#define OPTION_TYPE      %s\n", textof(OPTION_TYPE));
  fprintf(f, "#define COUNT_OF_ALTS    %s\n", textof(COUNT_OF_ALTS));
  fprintf(f, "#define COUNT_OF_PHRASES %s\n", textof(COUNT_OF_PHRASES));
  fprintf(f, "#define ALT_NUMBER       %s\n", textof(ALT_NUMBER));
  fprintf(f, "#define INDEX_MASK       %s\n", textof(INDEX_MASK));
  fprintf(f, "// (We have room for types 1..31)\n");
  fprintf(f, "#define PhraseType(idx)  %s\n", textof(PhraseType(idx)));
  fprintf(f, "\n");

  // NEXT_FREE_BIPNO refers to the internal sequence number, for example, if there were
  // only two BIPs referenced in a grammar:  B<fred>=42; and B<jim>=69; then
  // the internal BIP numbers for those two would be 0 and 1 respectively, with
  // 'NEXT_FREE_BIPNO' being set to 2.  BIP(0) would be 42 and BIP(1) would be 69.
  // The B_ constant should be the number the grammar writer sees, i. 42 or 69.
  // This range compression parallels what I used to do for phrases as well, and
  // I'm wondering if it contributes unnecessary complexity to the code,  and that
  // I should make BIP numbers sparse as I've done for regular phrase numbers.

  PHRASE_BASE = BIP_BASE+NUM_BIPS;
  SEMANTIC_BASE = PHRASE_BASE + NUM_SIMPLE_PHRASES;
  AST_BASE = SEMANTIC_BASE + NUM_SEMANTIC_PHRASES;
  
  fprintf(f, "\n");
  fprintf(f, "#define BIP_BASE %d\n", BIP_BASE);
  fprintf(f, "#define PHRASE_BASE %d\n", PHRASE_BASE);
  fprintf(f, "#define SEMANTIC_BASE %d\n", SEMANTIC_BASE);
  fprintf(f, "#define AST_BASE %d\n\n", AST_BASE);

  fprintf(f, "#define NUM_BIPS %d\n", NUM_BIPS);
  fprintf(f, "#define NUM_SIMPLE_PHRASES %d\n", NUM_SIMPLE_PHRASES);
  fprintf(f, "#define NUM_SEMANTIC_PHRASES %d\n",  NUM_SEMANTIC_PHRASES);
  fprintf(f, "#define NUM_PHRASES"
             " (NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES)\n\n");

  fprintf(f, "#define NUM_KEYWORDS %d\n", NUM_KEYWORDS);
  fprintf(f, "#define NUM_REGEXPS %d\n", NUM_REGEXPS);
  fprintf(f, "#define NUM_GRAMMAR %d\n", NUM_GRAMMAR);
  fprintf(f, "\n");

  // Current code revision: we're using separate namespaces for bips, phrases, and semantic code.
  for (i = 0; i < NUM_BIPS; i++) {
    fprintf(f, "#define B_%ls %ld\n", CString(bip_phrasename(i)), bip_map(i));
  }
  for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
    fprintf(f, "#define P_%ls %d\n", CString(phrasename(i)), i  +NUM_BIPS );  // <--- staying compatible with original for now.
  }
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "#define S_%ls %d\n", CString(semantic_phrasename(i)), i);
  }

  fprintf(f, "\n");
  fprintf(f, "extern const int bip_map[NUM_BIPS];\n");
  fprintf(f, "extern const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES];\n");
  fprintf(f, "extern const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES];\n\n");
  fprintf(f, "extern const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES];\n");
  fprintf(f, "extern const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES];\n");
  fprintf(f, "extern const wchar_t *xcomment[NUM_PHRASES];\n");
  fprintf(f, "extern const wchar_t *keyword[NUM_KEYWORDS];\n");
  fprintf(f, "extern const wchar_t *regexps[NUM_REGEXPS];\n");
  fprintf(f, "\n");
  fprintf(f, "extern const int gram[NUM_GRAMMAR];\n");
  for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
    int grammar_index = sequential_phrase_no_to_grammar_index(i);
    fprintf(f, "#define G_%ls %d\n", CString(phrasename(i)), grammar_index);
  }
  fprintf(f, "\n");
  fprintf(f, "extern parsefn parsetime[NUM_SEMANTIC_PHRASES];\n");
  for (i = 0; i <  NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "extern int parse_%ls(void);\n", CString(semantic_phrasename(i)));
  }
  fprintf(f, "\n");
  fprintf(f, "#ifndef SUPPRESS_DATA\n");
  fprintf(f, "const wchar_t *phrasename[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n");

  for (i = 0; i < NUM_BIPS; i++) {
    fprintf(f, "  /*%d+%d*/   L\"%ls\" /*%ld*/,\n",
               0, i,
               String(bip_phrasename(i)),
               bip_map(i));
  }
  for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
    //int grammar_index = sequential_phrase_no_to_grammar_index(i);
    fprintf(f, "  /*%d+%d*/   L\"%ls\",\n",
               NUM_BIPS,
               i,
               String(phrasename(i)));
  }
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "  /*%d+%d*/   L\"%ls\",\n",
               NUM_BIPS+NUM_SIMPLE_PHRASES, i,
               String(semantic_phrasename(i)));
  }
  fprintf(f, "};\n");

  fprintf(f, "const wchar_t *phrasename_c[NUM_BIPS+NUM_SIMPLE_PHRASES+NUM_SEMANTIC_PHRASES] = {\n");

  for (i = 0; i < NUM_BIPS; i++) {
    fprintf(f, "  /*%d+%d*/   L\"%ls\" /*%ld*/,\n",
               0, i,
               CString(bip_phrasename(i)),
               bip_map(i));
  }
  for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
    //int grammar_index = sequential_phrase_no_to_grammar_index(i);
    fprintf(f, "  /*%d+%d*/   L\"%ls\",\n",
               NUM_BIPS,
               i,
               CString(phrasename(i)));
  }
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "  /*%d+%d*/   L\"%ls\",\n",
               NUM_BIPS+NUM_SIMPLE_PHRASES, i,
               CString(semantic_phrasename(i)));
  }
  fprintf(f, "};\n");

  fprintf(f, "const int bip_map[NUM_BIPS] = {\n");
  for (i = 0; i < NUM_BIPS; i++) {
    fprintf(f, "  /*%d*/   %ld,\n", i, bip_map(i));
  }
  fprintf(f, "};\n");

  fprintf(f, "const int sequential_phrase_no_to_grammar_index[NUM_SIMPLE_PHRASES] = {\n");
  {
    for (i = 0; i < NUM_SIMPLE_PHRASES; i++) {
      int grammar_index = sequential_phrase_no_to_grammar_index(i);
      fprintf(f, "  G_%ls,  /*%d*/\n", CString(phrasename(i)), grammar_index);
    }
  }
  fprintf(f, "};\n");


  fprintf(f, "\n");
  fprintf(f, "const wchar_t *semantic_phrasename[NUM_SEMANTIC_PHRASES] = {\n");
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "  /*%d*/   L\"%ls\",\n", i, String(semantic_phrasename(i)));
  }
  fprintf(f, "};\n\n");

  fprintf(f, "const wchar_t *semantic_code[NUM_SEMANTIC_PHRASES] = {\n");
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "  /*%d*/   L\"", i);
    escape(f, String(semantic_code(i)), L"\"\n             \"");
    fprintf(f, "\",\n");
  }
  fprintf(f, "};\n\n");

  fprintf(f, "parsefn parsetime[NUM_SEMANTIC_PHRASES] = {\n");
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "  /*%d*/   &parse_%ls,\n", i, CString(semantic_phrasename(i)));
  }
  fprintf(f, "};\n\n");

  fprintf(f, "// Comments are stored so that they can be re-inserted, should\n");
  fprintf(f, "// we need to regenerate a grammar.g file from this header file.\n");
  fprintf(f, "const wchar_t *xcomment[NUM_PHRASES] = {\n");
  for (i = 0; i < NUM_PHRASES; i++) {
    fprintf(f, "  /*%3d*/ ", i);
    //if (xcomment(i) < 0 /* TO DO */ /* == NULL*/) {
      fprintf(f, "  NULL");
    //} else {
    //  fprintf(f, "  L\"");
    //  escape(f, String(xcomment(i)), L"\"\n             \"");
    //  fprintf(f, "\"");
    //}
    fprintf(f, ",\n");
  }
  fprintf(f, "};\n");

  fprintf(f, "const wchar_t *keyword[NUM_KEYWORDS] = {\n");
  for (i = 0; i < NUM_KEYWORDS; i++) {
    fprintf(f, "  /*%3d*/   L\"", i);
    escape(f, String(keyword(i)), NULL);
    fprintf(f, "\",\n");
  }
  fprintf(f, "};\n");

  fprintf(f, "const wchar_t *regexps[NUM_REGEXPS] = {\n");
  for (i = 0; i < NUM_REGEXPS; i++) {
    fprintf(f, "  /*%d*/   L\"%ls\",\n", i, String(regexps(i)));
  }
  fprintf(f, "};\n");

  fprintf(f, "const int gram[NUM_GRAMMAR /* %d */] = {\n", NUM_GRAMMAR);

  for (i = 0; i < NUM_GRAMMAR; i++) {

    int p;
    for (p = 0; p < NUM_SIMPLE_PHRASES; p++) {
      if (sequential_phrase_no_to_grammar_index(p) == i) {
        fprintf(f, "\n// P<%ls> = ...;\n", String(phrasename(p)));
      }
    }

    int type = PhraseType(gram(i));
    int negated = gram(i) & NEGATED_PHRASE;
    int guard = gram(i) & GUARD_PHRASE;
    int whitespace = gram(i) & WHITESPACE_ALLOWED;
    int index = gram(i) & INDEX_MASK;
    
    fprintf(f, "  /*%3d*/ ", i);

    if ((type<<GRAMMAR_TYPE_SHIFT) == BIP_TYPE) {
      print_type(f, BIP_TYPE>>GRAMMAR_TYPE_SHIFT);
      if (index == 0 /* B_EOF */) whitespace = 1;
      // BIPs should allow whitespace before.
      // Well, at least EOF should. Not sure about any others.
    } else {
      print_type(f, type);
    }
    print_bool(f, negated,    "NEGATED_PHRASE    ");
    print_bool(f, guard,      "GUARD_PHRASE      ");
    print_bool(f, whitespace, "WHITESPACE_ALLOWED");
    if ((type<<GRAMMAR_TYPE_SHIFT) == PHRASE_TYPE) {
      // And we need to distinguish between regular phrases and BIPS
      // where a phrase is used in a rule:
      
      // if (debug) fprintf(f, "G_%ls /*0x%06x*/,\n", CString(phrasename(index)), index); else
      //fprintf(f, "G_%ls, /* gram[%d]=%d (plus bits) use phrasename(%d) */\n", CString(phrasename(index)), i, index, index);
      fprintf(f, "G_%ls,\n", CString(phrasename(index)));
    } else if ((type<<GRAMMAR_TYPE_SHIFT) == BIP_TYPE) {
      fprintf(f, "B_%ls,\n", CString(bip_phrasename(index)));
    } else if ((type<<GRAMMAR_TYPE_SHIFT) == SEMANTIC_TYPE) {
      fprintf(f, "S_%ls,\n", CString(semantic_phrasename(index)));
    } else {
      fprintf(f, "0x%06x,", index);

      if ((type<<GRAMMAR_TYPE_SHIFT) == REGEXP_TYPE) {
        fprintf(f, "  /* %ls */", String(regexps(index)));
      } else if ((type<<GRAMMAR_TYPE_SHIFT) == KEYWORD_TYPE) {
        fprintf(f, "  /* %ls */", String(keyword(index)));
      } else if ((type<<GRAMMAR_TYPE_SHIFT) == CHAR_TYPE) {
        fprintf(f, "  /* '%c' */", index&0xFF);
      }
      fprintf(f, "\n");
    }
  }
  fprintf(f, "};\n\n");

  fprintf(f, "%ls\n", &initcode(0));
  
  // procedure bodies come last so that they can use all the data created above.
  for (i = 0; i < NUM_SEMANTIC_PHRASES; i++) {
    fprintf(f, "int parse_");
    fprintf(f, "%ls", CString(semantic_phrasename(i)));
    fprintf(f, "(void)\n{%ls}\n", String(semantic_code(i)));
  }

  for (i = 0; i < NUM_BIPS; i++) {
    fprintf(f, "\n// B<%ls> = %ld;\n", CString(bip_phrasename(i)), bip_map(i));
  }
  fprintf(f, "\n// E\n");
  
  fprintf(f, "#endif  // SUPPRESS_DATA\n");
  fprintf(f, "#endif  // _GRAMMAR_H_\n");

}