48 #include "EST_Token.h" 
   49 #include "EST_string_aux.h" 
   50 #include "EST_cutils.h" 
   51 #include "EST_error.h" 
   53 const EST_String EST_Token_Default_WhiteSpaceChars = 
" \t\n\r";
 
   54 const EST_String EST_Token_Default_SingleCharSymbols = 
"(){}[]";
 
   55 const EST_String EST_Token_Default_PrePunctuationSymbols = 
"\"'`({[";
 
   56 const EST_String EST_Token_Default_PunctuationSymbols = 
"\"'`.,:;!?]})";
 
   57 const EST_String Token_Origin_FD = 
"existing file descriptor";
 
   58 const EST_String Token_Origin_Stream = 
"existing istream";
 
   59 const EST_String Token_Origin_String = 
"existing string";
 
   61 static EST_Regex RXanywhitespace(
"[ \t\n\r]");
 
   63 static inline char *check_extend_str_in(
char *str, 
int pos, 
int *max)
 
   75     newstuff = 
new char[*max];
 
   76     strncpy(newstuff,str,pos);
 
   84 #define check_extend_str(STR, POS, MAX) \ 
   85     (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR)) 
   87 ostream& operator<<(ostream& s, 
const EST_Token &p)
 
   89     s << 
"[TOKEN " << p.pname << 
"]";
 
   98     p_filepos = a.p_filepos;
 
   99     p_quoted = a.p_quoted;
 
  109     return "line "+itoString(linenum)+
" char "+itoString(linepos);
 
  118 EST_TokenStream::EST_TokenStream()
 
  121     tok_wspace = 
new char[tok_wspacelen];
 
  123     tok_stuff = 
new char[tok_stufflen];
 
  124     tok_prepuncslen = 32;  
 
  125     tok_prepuncs = 
new char[tok_prepuncslen];
 
  134     cerr << 
"TokenStream: warning passing TokenStream not as reference"  
  150 void EST_TokenStream::default_values()
 
  154     peeked_charp = FALSE;
 
  159     WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
 
  169     if (type != tst_none) 
 
  171     delete [] tok_wspace;
 
  173     delete [] tok_prepuncs;
 
  179     s << 
"[TOKENSTREAM ";
 
  183     cerr << 
"UNSET"; 
break;
 
  185     cerr << 
"FILE"; 
break;
 
  187     cerr << 
"PIPE"; 
break;
 
  189     cerr << 
"ISTREAM"; 
break;
 
  191     cerr << 
"STRING"; 
break;
 
  193     cerr << 
"UNKNOWN" << endl;
 
  202     if (type != tst_none)
 
  205     fp = fopen(filename,
"rb");
 
  208     cerr << 
"Cannot open file " << filename << 
" as tokenstream"  
  221     if (type != tst_none)
 
  227     cerr << 
"Cannot absorb NULL filestream as tokenstream" << endl;
 
  230     Origin = Token_Origin_FD;
 
  233     close_at_end = close_when_finished;
 
  241     if (type != tst_none)
 
  245     Origin = Token_Origin_Stream;
 
  255     if (type != tst_none)
 
  258     buf = (
const char *)newbuffer;
 
  259     buffer_length = newbuffer.
length();
 
  260     buffer = 
new char[buffer_length+1];
 
  261     memmove(buffer,buf,buffer_length+1);
 
  263     Origin = Token_Origin_String;
 
  269 int EST_TokenStream::seek_end()
 
  272     peeked_charp = FALSE;
 
  278     cerr << 
"EST_TokenStream unset" << endl;
 
  282     fseek(fp,0,SEEK_END);
 
  283     p_filepos = ftell(fp);
 
  286     cerr << 
"EST_TokenStream seek on pipe not supported" << endl;
 
  290     cerr << 
"EST_TokenStream seek on istream not yet supported" << endl;
 
  297     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  306     peeked_charp = FALSE;
 
  312     cerr << 
"EST_TokenStream unset" << endl;
 
  316     p_filepos = position;
 
  317     return fseek(fp,position,SEEK_SET);
 
  319     cerr << 
"EST_TokenStream seek on pipe not supported" << endl;
 
  323     cerr << 
"EST_TokenStream seek on istream not yet supported" << endl;
 
  339     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  347 static int stdio_fread(
void *buff,
int size,
int nitems,FILE *fp)
 
  350     return fread(buff,size,nitems,fp);
 
  362         << 
" peeked into binary data" << endl;
 
  366     peeked_charp = FALSE;
 
  372     cerr << 
"EST_TokenStream unset" << endl;
 
  376     items_read = stdio_fread(buff,(
size_t)size,(
size_t)nitems,fp);
 
  377     p_filepos += items_read*size;
 
  380     cerr << 
"EST_TokenStream fread pipe not yet supported" << endl;
 
  384     cerr << 
"EST_TokenStream fread istream not yet supported" << endl;
 
  387     if ((buffer_length-pos)/size < nitems)
 
  388         items_read = (buffer_length-pos)/size;
 
  391     memcpy(buff,&buffer[pos],items_read*size);
 
  392     pos += items_read*size;
 
  395     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  424     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  429     peeked_charp = FALSE;
 
  443         fp = freopen(Origin,
"rb",fp);
 
  447     cerr << 
"EST_TokenStream: can't rewind pipe" << endl;
 
  451     cerr << 
"EST_TokenStream: can't rewind istream" << endl;
 
  457     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  462     peeked_charp = FALSE;
 
  500     result += t.whitespace() + t.prepunctuation() +
 
  501         t.string() + t.punctuation();
 
  504         cerr << 
"EST_TokenStream: end of file when looking for \"" <<
 
  523     result += t.whitespace() + t.prepunctuation();
 
  526         result += quote_string(t.string());
 
  528         result += t.string();
 
  530     result += t.punctuation();
 
  543     char *w = wstrdup(
peek().whitespace());
 
  545     for (i=0; w[i] != 0; i++)
 
  570             EST_error(
"Expected '%s' got '%s' at %s", 
 
  571                       (
const char *)expected, 
 
  581 void EST_TokenStream::build_table()
 
  587     for (i=0; i<256; ++i)
 
  590     for (p=WhiteSpaceChars; *p; ++p)
 
  591     if (p_table[c=(
unsigned char)*p])
 
  592         EST_warning(
"Character '%c' has two classes, '%c' and '%c'", 
 
  597     for (p=SingleCharSymbols; *p; ++p)
 
  598     if (p_table[c=(
unsigned char)*p])
 
  599         EST_warning(
"Character '%c' has two classes, '%c' and '%c'", 
 
  600             *p, p_table[c], 
'!');
 
  604     for (p=PunctuationSymbols; *p; ++p)
 
  605     if (p_table[c=(
unsigned char)*p] == 
'@')
 
  608         EST_warning(
"Character '%c' has two classes, '%c' and '%c'", 
 
  609             *p, p_table[c], 
'.');
 
  613     for(p=PrePunctuationSymbols; *p; ++p)
 
  614     if (p_table[c=(
unsigned char)*p] == 
'@')
 
  616     else if (p_table[c] == 
'.')
 
  619         EST_warning(
"Character '%c' has two classes, '%c' and '%c'", 
 
  620             *p, p_table[c], 
'$');
 
  627 inline int EST_TokenStream::getpeeked_internal(
void)
 
  629   peeked_charp = FALSE;
 
  634 int EST_TokenStream::getch_internal()
 
  637     if (EST_TokenStream::peeked_charp)
 
  639       return getpeeked_internal();
 
  645     cerr << 
"EST_TokenStream unset" << endl;
 
  652         if (stdio_fread(&lc,1,1,fp) == 0)
 
  659     cerr << 
"EST_TokenStream pipe not yet supported" << endl;
 
  666     if (pos < buffer_length)
 
  669         return buffer[pos++];
 
  674     cerr << 
"EST_TokenStream: unknown type" << endl;
 
  681 int EST_TokenStream::getch(
void)
 
  683   return getch_internal();
 
  686 inline int EST_TokenStream::peekch_internal()
 
  691     peeked_char = getch_internal();
 
  697 int EST_TokenStream::peekch(
void)
 
  699   return peekch_internal();
 
  703 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL)) 
  705 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2)) 
  721     for (i=0; (CLASS(c=getch_internal(),
' ') && 
 
  724     if (c == 
'\n') linepos++;
 
  725     tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
 
  728     tok_wspace[i] = 
'\0';
 
  740          ((c = getch_internal()) != EOF)
 
  745         tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
 
  747             c = getch_internal();
 
  754         for (i=0,tok_stuff[i++]=c; 
 
  757           !CLASS(c=peekch_internal(),
' ') && 
 
  761         tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
 
  763         tok_stuff[i++] = getpeeked_internal();
 
  769          ((j < i) && CLASS2(tok_stuff[j], 
'$', 
'"'));
 
  771     if ((j > 0) && (j < i))  
 
  773         tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
 
  774         memmove(tok_prepuncs,tok_stuff,j);
 
  775         tok_prepuncs[j] = 
'\0';
 
  787          ((j > 0) && CLASS2(word[j],
'.',
'"'));
 
  789     if (word[j+1] != 
'\0')
 
  798     if (tok_wspace[0] == 
'\0') 
 
  820     if ((
peek().whitespace().contains(
"\n")) || 
eof())
 
  846     quoted[0] = quote(0);
 
  847     for (i=1,j=0; j < s.
length(); j++,i++)
 
  849         if (s(j) == quote(0))
 
  850         quoted[i++] = escape(0);
 
  851         else if (s(j) == escape(0))
 
  852         quoted[i++] = escape(0);
 
  855     quoted[i++] = quote(0);
 
  857     quoted_form = quoted;
 
  867     return Origin+
":"+itoString(linepos);