26 #include "lt-memory.h"
29 #define ERR(m) LT_ERROR(NECHAR,m)
30 #define ERR1(m,x) LT_ERROR1(NECHAR,m,x)
31 #define ERR2(m,x,y) LT_ERROR2(NECHAR,m,x,y)
32 #define ERR3(m,x,y,z) LT_ERROR3(NECHAR,m,x,y,z)
35 #define Realloc srealloc
41 #define ERR(m) fprintf(stderr,m)
42 #define ERR1(m,x) fprintf(stderr,m,x)
43 #define ERR2(m,x,y) fprintf(stderr,m,x,y)
44 #define ERR3(m,x,y,z) fprintf(stderr,m,x,y,z)
55 static int get_translated_line1(InputSource s);
57 InputSource SourceFromStream(
const char8 *description, FILE *file)
61 e = NewExternalEntity(0, 0, description, 0, 0);
62 if(!strchr8(description,
'/'))
63 EntitySetBaseURL(e, default_base_url());
65 return NewInputSource(e, MakeFILE16FromFILE(file,
"r"));
68 InputSource EntityOpen(Entity e)
72 if(e->type == ET_external)
74 const char8 *url = EntityURL(e);
76 if(!url || !(f16 = url_open(url, 0,
"r", 0)))
81 f16 = MakeFILE16FromString((
char *)e->text, -1,
"r");
84 return NewInputSource(e, f16);
88 InputSource NewInputSource(Entity e, FILE16 *f16)
92 if(!(source = Malloc(
sizeof(*source))))
96 source->line_alloc = 0;
97 source->line_length = 0;
103 source->file16 = f16;
105 source->bytes_consumed = 0;
106 source->bytes_before_current_line = 0;
107 source->line_end_was_cr = 0;
108 source->line_number = 0;
109 source->not_read_yet = 1;
111 source->nextin = source->insize = 0;
118 int SourceLineAndChar(InputSource s,
int *linenum,
int *charnum)
120 Entity e = s->entity, f = e->parent;
122 if(e->type == ET_external)
124 *linenum = s->line_number;
129 if(f && f->type == ET_external)
131 if(e->matches_parent_text)
133 *linenum = e->line_offset + s->line_number;
134 *charnum = (s->line_number == 0 ? e->line1_char_offset : 0) +
140 *linenum = e->line_offset;
141 *charnum = e->line1_char_offset;
146 if(f && f->matches_parent_text)
148 *linenum = f->line_offset + e->line_offset;
149 *charnum = (e->line_offset == 0 ? f->line1_char_offset : 0) +
150 e->line1_char_offset;
157 void SourcePosition(InputSource s, Entity *
entity,
int *byte_offset)
160 *byte_offset = SourceTell(s);
163 int SourceTell(InputSource s)
166 return s->bytes_before_current_line + s->next;
168 switch(s->entity->encoding)
170 case CE_ISO_10646_UCS_2B:
172 case CE_ISO_10646_UCS_2L:
174 return s->bytes_before_current_line + 2 * s->next;
184 case CE_unspecified_ascii_superset:
185 return s->bytes_before_current_line + s->next;
187 if(s->complicated_utf8_line)
191 for(i = 0; i < s->next; i++)
198 else if(c >= 0xd800 && c <= 0xdfff)
203 else if(c <= 0x1ffff)
205 else if(c <= 0x3ffffff)
211 return s->bytes_before_current_line + n;
214 return s->bytes_before_current_line + s->next;
221 int SourceSeek(InputSource s,
int offset)
226 s->bytes_consumed = s->bytes_before_current_line = offset;
227 s->nextin = s->insize = 0;
229 s->line_number = -999999;
230 return Fseek(s->file16, offset, SEEK_SET);
233 static int get_translated_line(InputSource s)
239 int handle2, handle3;
243 Entity e = s->entity;
245 struct _FILE16 *f16 = (
struct _FILE16 *)s->file16;
248 if(e->type == ET_external)
249 return get_translated_line1(s);
251 if(!*(Char *)((
char *)f16->handle + f16->handle2))
257 s->line = (Char *)((
char *)f16->handle + f16->handle2);
258 for(p=s->line; *p && *p !=
'\n'; p++)
262 f16->handle2 = (
char *)p - (
char *)f16->handle;
263 s->line_length = p - s->line;
265 s->bytes_before_current_line = f16->handle2;
270 static int get_translated_line1(InputSource s)
273 unsigned char *inbuf = s->inbuf;
274 int nextin = s->nextin, insize = s->insize;
275 int startin = s->nextin;
276 Char *outbuf = s->line;
277 int outsize = s->line_alloc;
280 int ignore_linefeed = s->line_end_was_cr;
285 CharacterEncoding enc = s->entity->encoding;
287 s->complicated_utf8_line = 0;
289 if(enc >= CE_ISO_8859_2 && enc <= CE_ISO_8859_9)
290 to_unicode = iso_to_unicode[enc - CE_ISO_8859_2];
294 s->line_end_was_cr = 0;
295 s->bytes_before_current_line = s->bytes_consumed;
300 if(outsize < nextout + (insize - nextin))
302 outsize = nextout + (insize - nextin);
303 outbuf = Realloc(outbuf, outsize *
sizeof(Char));
306 while(nextin < insize)
313 case CE_ISO_10646_UCS_2B:
315 if(nextin+2 > insize)
317 c = (inbuf[nextin] << 8) + inbuf[nextin+1];
320 case CE_ISO_10646_UCS_2L:
322 if(nextin+2 > insize)
324 c = (inbuf[nextin+1] << 8) + inbuf[nextin];
328 case CE_unspecified_ascii_superset:
339 c = to_unicode[inbuf[nextin++]];
340 if(c == (
unsigned int)-1)
341 ERR3(
"Illegal %s character <0x%x> "
342 "at file offset %d\n",
343 CharacterEncodingName[enc], inbuf[nextin-1],
344 s->bytes_consumed + nextin - 1 - startin);
350 if(c <= 0xc0 || c >= 0xfe)
352 ERR2(
"Illegal UTF-8 start byte <0x%x> "
353 "at file offset %d\n",
354 c, s->bytes_consumed + nextin - 1 - startin);
382 if(nextin+more > insize)
387 s->complicated_utf8_line = 1;
388 for(i=0; i<more; i++)
389 c = (c << 6) + (inbuf[nextin++] & 0x3f);
392 ERR(
"read from entity with unsupported encoding!\n");
396 if(c > 0x110000 || (c < 0x10000 && !is_xml_legal(c)))
397 if(!(enc == CE_UTF_16L || enc == CE_UTF_16B) ||
398 c < 0xd800 || c > 0xdfff)
402 ERR2(
"Error: illegal character <0x%x> "
403 "immediately before file offset %d\n",
404 c, s->bytes_consumed + nextin - startin);
408 if(c ==
'\n' && ignore_linefeed)
412 s->bytes_before_current_line += (nextin - startin);
419 s->line_end_was_cr = 1;
427 outbuf[nextout++] = ((c - 0x10000) >> 10) + 0xd800;
428 outbuf[nextout++] = ((c - 0x10000) & 0x3ff) + 0xdc00;
431 outbuf[nextout++] = c;
433 outbuf[nextout++] = c;
440 s->bytes_consumed += (nextin - startin);
442 s->line_alloc = outsize;
443 s->line_length = nextout;
453 remaining = insize - nextin;
454 for(i=0; i<remaining; i++)
455 inbuf[i] = inbuf[nextin + i];
460 s->bytes_consumed += (nextin - startin);
462 insize = Readu(s->file16,
463 inbuf+insize-nextin,
sizeof(s->inbuf)-remaining);
464 nextin = startin = 0;
471 s->line_alloc = outsize;
472 s->line_length = nextout;
480 void determine_character_encoding(InputSource s)
482 Entity e = s->entity;
484 unsigned char *b = (
unsigned char *)s->inbuf;
486 b[0] = b[1] = b[2] = b[3] = 0;
490 nread = Readu(s->file16, s->inbuf + s->insize, 4 - s->insize);
499 if(b[0] == 0 && b[1] == 0 && b[2] == 0 && b[3] ==
'<')
500 e->encoding = CE_ISO_10646_UCS_4B;
501 else if(b[0] ==
'<' && b[1] == 0 && b[2] == 0 && b[3] == 0)
502 e->encoding = CE_ISO_10646_UCS_4L;
505 if(b[0] == 0xfe && b[1] == 0xff)
507 e->encoding = CE_UTF_16B;
510 else if(b[0] == 0 && b[1] ==
'<' && b[2] == 0 && b[3] ==
'?')
511 e->encoding = CE_UTF_16B;
512 else if(b[0] == 0xff && b[1] == 0xfe)
514 e->encoding = CE_UTF_16L;
517 else if(b[0] ==
'<' && b[1] == 0 && b[2] ==
'?' && b[3] == 0)
518 e->encoding = CE_UTF_16L;
522 e->encoding = CE_unspecified_ascii_superset;
524 e->encoding = CE_UTF_8;
529 int get_with_fill(InputSource s)
531 assert(!s->seen_eoe);
533 if(get_translated_line(s) != 0)
537 ERR1(
"I/O error on stream <%s>, ignore further errors\n",
538 EntityDescription(s->entity));
541 s->line_length = s->next;
546 if(s->line_length == 0)
549 s->line_length = s->next;
561 return s->line[s->next++];