48 #include "EST_Token.h"
49 #include "EST_string_aux.h"
50 #include "EST_cutils.h"
51 #include "EST_error.h"
53 const EST_String EST_Token_Default_WhiteSpaceChars =
" \t\n\r";
54 const EST_String EST_Token_Default_SingleCharSymbols =
"(){}[]";
55 const EST_String EST_Token_Default_PrePunctuationSymbols =
"\"'`({[";
56 const EST_String EST_Token_Default_PunctuationSymbols =
"\"'`.,:;!?]})";
57 const EST_String Token_Origin_FD =
"existing file descriptor";
58 const EST_String Token_Origin_Stream =
"existing istream";
59 const EST_String Token_Origin_String =
"existing string";
61 static EST_Regex RXanywhitespace(
"[ \t\n\r]");
63 static inline char *check_extend_str_in(
char *str,
int pos,
int *max)
75 newstuff =
new char[*max];
76 strncpy(newstuff,str,pos);
84 #define check_extend_str(STR, POS, MAX) \
85 (((POS)>= *(MAX))?check_extend_str_in((STR),(POS),(MAX)):(STR))
87 ostream& operator<<(ostream& s,
const EST_Token &p)
89 s <<
"[TOKEN " << p.pname <<
"]";
98 p_filepos = a.p_filepos;
99 p_quoted = a.p_quoted;
109 return "line "+itoString(linenum)+
" char "+itoString(linepos);
118 EST_TokenStream::EST_TokenStream()
121 tok_wspace =
new char[tok_wspacelen];
123 tok_stuff =
new char[tok_stufflen];
124 tok_prepuncslen = 32;
125 tok_prepuncs =
new char[tok_prepuncslen];
134 cerr <<
"TokenStream: warning passing TokenStream not as reference"
150 void EST_TokenStream::default_values()
154 peeked_charp = FALSE;
159 WhiteSpaceChars = EST_Token_Default_WhiteSpaceChars;
169 if (type != tst_none)
171 delete [] tok_wspace;
173 delete [] tok_prepuncs;
179 s <<
"[TOKENSTREAM ";
183 cerr <<
"UNSET";
break;
185 cerr <<
"FILE";
break;
187 cerr <<
"PIPE";
break;
189 cerr <<
"ISTREAM";
break;
191 cerr <<
"STRING";
break;
193 cerr <<
"UNKNOWN" << endl;
202 if (type != tst_none)
205 fp = fopen(filename,
"rb");
208 cerr <<
"Cannot open file " << filename <<
" as tokenstream"
221 if (type != tst_none)
227 cerr <<
"Cannot absorb NULL filestream as tokenstream" << endl;
230 Origin = Token_Origin_FD;
233 close_at_end = close_when_finished;
241 if (type != tst_none)
245 Origin = Token_Origin_Stream;
255 if (type != tst_none)
258 buf = (
const char *)newbuffer;
259 buffer_length = newbuffer.
length();
260 buffer =
new char[buffer_length+1];
261 memmove(buffer,buf,buffer_length+1);
263 Origin = Token_Origin_String;
269 int EST_TokenStream::seek_end()
272 peeked_charp = FALSE;
278 cerr <<
"EST_TokenStream unset" << endl;
282 fseek(fp,0,SEEK_END);
283 p_filepos = ftell(fp);
286 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
290 cerr <<
"EST_TokenStream seek on istream not yet supported" << endl;
297 cerr <<
"EST_TokenStream: unknown type" << endl;
306 peeked_charp = FALSE;
312 cerr <<
"EST_TokenStream unset" << endl;
316 p_filepos = position;
317 return fseek(fp,position,SEEK_SET);
319 cerr <<
"EST_TokenStream seek on pipe not supported" << endl;
323 cerr <<
"EST_TokenStream seek on istream not yet supported" << endl;
339 cerr <<
"EST_TokenStream: unknown type" << endl;
347 static int stdio_fread(
void *buff,
int size,
int nitems,FILE *fp)
350 return fread(buff,size,nitems,fp);
362 <<
" peeked into binary data" << endl;
366 peeked_charp = FALSE;
372 cerr <<
"EST_TokenStream unset" << endl;
376 items_read = stdio_fread(buff,(
size_t)size,(
size_t)nitems,fp);
377 p_filepos += items_read*size;
380 cerr <<
"EST_TokenStream fread pipe not yet supported" << endl;
384 cerr <<
"EST_TokenStream fread istream not yet supported" << endl;
387 if ((buffer_length-pos)/size < nitems)
388 items_read = (buffer_length-pos)/size;
391 memcpy(buff,&buffer[pos],items_read*size);
392 pos += items_read*size;
395 cerr <<
"EST_TokenStream: unknown type" << endl;
424 cerr <<
"EST_TokenStream: unknown type" << endl;
429 peeked_charp = FALSE;
443 fp = freopen(Origin,
"rb",fp);
447 cerr <<
"EST_TokenStream: can't rewind pipe" << endl;
451 cerr <<
"EST_TokenStream: can't rewind istream" << endl;
457 cerr <<
"EST_TokenStream: unknown type" << endl;
462 peeked_charp = FALSE;
500 result += t.whitespace() + t.prepunctuation() +
501 t.string() + t.punctuation();
504 cerr <<
"EST_TokenStream: end of file when looking for \"" <<
523 result += t.whitespace() + t.prepunctuation();
526 result += quote_string(t.string());
528 result += t.string();
530 result += t.punctuation();
543 char *w = wstrdup(
peek().whitespace());
545 for (i=0; w[i] != 0; i++)
570 EST_error(
"Expected '%s' got '%s' at %s",
571 (
const char *)expected,
581 void EST_TokenStream::build_table()
587 for (i=0; i<256; ++i)
590 for (p=WhiteSpaceChars; *p; ++p)
591 if (p_table[c=(
unsigned char)*p])
592 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
597 for (p=SingleCharSymbols; *p; ++p)
598 if (p_table[c=(
unsigned char)*p])
599 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
600 *p, p_table[c],
'!');
604 for (p=PunctuationSymbols; *p; ++p)
605 if (p_table[c=(
unsigned char)*p] ==
'@')
608 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
609 *p, p_table[c],
'.');
613 for(p=PrePunctuationSymbols; *p; ++p)
614 if (p_table[c=(
unsigned char)*p] ==
'@')
616 else if (p_table[c] ==
'.')
619 EST_warning(
"Character '%c' has two classes, '%c' and '%c'",
620 *p, p_table[c],
'$');
627 inline int EST_TokenStream::getpeeked_internal(
void)
629 peeked_charp = FALSE;
634 int EST_TokenStream::getch_internal()
637 if (EST_TokenStream::peeked_charp)
639 return getpeeked_internal();
645 cerr <<
"EST_TokenStream unset" << endl;
652 if (stdio_fread(&lc,1,1,fp) == 0)
659 cerr <<
"EST_TokenStream pipe not yet supported" << endl;
666 if (pos < buffer_length)
669 return buffer[pos++];
674 cerr <<
"EST_TokenStream: unknown type" << endl;
681 int EST_TokenStream::getch(
void)
683 return getch_internal();
686 inline int EST_TokenStream::peekch_internal()
691 peeked_char = getch_internal();
697 int EST_TokenStream::peekch(
void)
699 return peekch_internal();
703 #define CLASS(C,CL) (p_table[(unsigned char)(C)]==(CL))
705 #define CLASS2(C,CL1,CL2) (p_table[(unsigned char)(C)]==(CL1)||p_table[(unsigned char)(C)]==(CL2))
721 for (i=0; (CLASS(c=getch_internal(),
' ') &&
724 if (c ==
'\n') linepos++;
725 tok_wspace = check_extend_str(tok_wspace,i,&tok_wspacelen);
728 tok_wspace[i] =
'\0';
740 ((c = getch_internal()) != EOF)
745 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
747 c = getch_internal();
754 for (i=0,tok_stuff[i++]=c;
757 !CLASS(c=peekch_internal(),
' ') &&
761 tok_stuff = check_extend_str(tok_stuff,i,&tok_stufflen);
763 tok_stuff[i++] = getpeeked_internal();
769 ((j < i) && CLASS2(tok_stuff[j],
'$',
'"'));
771 if ((j > 0) && (j < i))
773 tok_prepuncs = check_extend_str(tok_prepuncs,j+1,&tok_prepuncslen);
774 memmove(tok_prepuncs,tok_stuff,j);
775 tok_prepuncs[j] =
'\0';
787 ((j > 0) && CLASS2(word[j],
'.',
'"'));
789 if (word[j+1] !=
'\0')
798 if (tok_wspace[0] ==
'\0')
820 if ((
peek().whitespace().contains(
"\n")) ||
eof())
846 quoted[0] = quote(0);
847 for (i=1,j=0; j < s.
length(); j++,i++)
849 if (s(j) == quote(0))
850 quoted[i++] = escape(0);
851 else if (s(j) == escape(0))
852 quoted[i++] = escape(0);
855 quoted[i++] = quote(0);
857 quoted_form = quoted;
867 return Origin+
":"+itoString(linepos);