46 #include "EST_Pathname.h"
47 #include "EST_cutils.h"
48 #include "EST_Token.h"
49 #include "EST_FileType.h"
52 #include "EST_TVector.h"
55 Declare_TVector_Base_T(
EST_WFST_State *, NULL, NULL, EST_WFST_StateP)
58 #if defined(INSTANTIATE_TEMPLATES)
59 #include "../base_class/EST_TList.cc"
63 #include "../base_class/EST_TVector.cc"
65 Instantiate_TVector_T(EST_WFST_State *, EST_WFST_StateP)
70 int EST_WFST::traverse_tag = 0;
72 EST_WFST_State::EST_WFST_State(
int name)
79 EST_WFST_State::EST_WFST_State(
const EST_WFST_State &state)
83 p_name = state.p_name;
84 p_type = state.p_type;
86 for (p=state.transitions.head(); p != 0; p=p->next())
90 EST_WFST_State::~EST_WFST_State()
94 for (p=transitions.head(); p != 0; p=p->next())
95 delete transitions(p);
110 EST_WFST::~EST_WFST()
120 for (
int i=0; i < p_num_states; ++i)
135 p_in_symbols = wfst.p_in_symbols;
136 p_out_symbols = wfst.p_out_symbols;
137 p_start_state = wfst.p_start_state;
138 current_tag = wfst.current_tag;
139 p_num_states = wfst.p_num_states;
140 p_states.
resize(p_num_states);
141 for (
int i=0; i < p_num_states; ++i)
142 p_states[i] =
new EST_WFST_State(*wfst.
state(i));
151 p_states.
resize(init_num_states);
152 for (i=0; i < p_states.
length(); i++)
154 p_num_states = init_num_states;
165 for (iin=in_alphabet; iin != NIL; iin=cdr(iin))
166 if ((!streq(get_c_string(car(iin)),
"__epsilon__")) &&
167 (!streq(get_c_string(car(iin)),
"=")))
168 in.
append(get_c_string(car(iin)));
170 out.
append(
"__epsilon__");
172 for (oout=out_alphabet; oout != NIL; oout=cdr(oout))
173 if ((!streq(get_c_string(car(oout)),
"__epsilon__")) &&
174 (!streq(get_c_string(car(oout)),
"=")))
175 out.
append(get_c_string(car(oout)));
177 p_in_symbols.
init(in);
178 p_out_symbols.
init(out);
185 int in_i = p_in_symbols.
name(in);
190 cerr <<
"WFST transduce: \"" << in <<
"\" not in alphabet" << endl;
191 return WFST_ERROR_STATE;
196 out = p_out_symbols.
name(out_i);
203 EST_WFST_State *s = p_states(state);
206 for (i=s->transitions.head(); i != 0; i=i->next())
208 if (in == s->transitions(i)->in_symbol())
211 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
212 out.
append(s->transitions(i));
221 EST_WFST_State *s = p_states(state);
224 for (i=s->transitions.head(); i != 0; i=i->next())
226 if (in == s->transitions(i)->in_symbol())
228 out = s->transitions(i)->out_symbol();
229 return s->transitions(i)->state();
233 return WFST_ERROR_STATE;
247 int in_i = p_in_symbols.
name(in);
248 int out_i = p_out_symbols.
name(out);
250 if ((in_i == -1) || (out_i == -1))
252 cerr <<
"WFST: one of " << in <<
"/" << out <<
" not in alphabet"
254 return WFST_ERROR_STATE;
270 EST_WFST_State *s = p_states(state);
273 for (i=s->transitions.head(); i != 0; i=i->next())
275 if ((in == s->transitions(i)->in_symbol()) &&
276 (out == s->transitions(i)->out_symbol()))
279 s->transitions(i)->set_weight(1+s->transitions(i)->weight());
280 return s->transitions(i);
295 return WFST_ERROR_STATE;
299 prob = trans->weight();
300 return trans->state();
304 EST_write_status EST_WFST::save_binary(FILE *fd)
308 int num_transitions, type, in, out, next_state;
311 for (i=0; i<p_num_states; i++)
313 num_transitions = p_states[i]->num_transitions();
314 fwrite(&num_transitions,4,1,fd);
315 if (p_states[i]->type() == wfst_final)
317 else if (p_states[i]->type() == wfst_nonfinal)
318 type = WFST_NONFINAL;
319 else if (p_states[i]->type() == wfst_licence)
323 fwrite(&type,4,1,fd);
324 for (j=p_states[i]->transitions.head(); j != 0; j=j->next())
326 in = p_states[i]->transitions(j)->in_symbol();
327 out = p_states[i]->transitions(j)->out_symbol();
328 next_state = p_states[i]->transitions(j)->state();
329 weight = p_states[i]->transitions(j)->weight();
341 fwrite(&next_state,4,1,fd);
342 fwrite(&weight,4,1,fd);
354 static EST_Regex needquotes(
".*[()'\";., \t\n\r].*");
359 else if ((ofd = fopen(filename,
"wb")) == NULL)
361 cerr <<
"WFST: cannot write to file \"" << filename <<
"\"" << endl;
362 return misc_write_error;
365 fprintf(ofd,
"EST_File fst\n");
366 fprintf(ofd,
"DataType %s\n",(
const char *)type);
367 fprintf(ofd,
"in %s\n",
369 p_in_symbols.print_to_string(TRUE)+
")",
371 fprintf(ofd,
"out %s\n",
373 p_out_symbols.print_to_string(TRUE)+
")",
375 fprintf(ofd,
"NumStates %d\n",p_num_states);
376 fprintf(ofd,
"ByteOrder %s\n", ((EST_NATIVE_BO == bo_big) ?
"10" :
"01"));
377 fprintf(ofd,
"EST_Header_End\n");
379 if (type ==
"binary")
383 for (i=0; i < p_num_states; i++)
385 EST_WFST_State *s=p_states[i];
386 fprintf(ofd,
"((%d ",s->name());
390 fprintf(ofd,
"final ");
393 fprintf(ofd,
"nonfinal ");
396 fprintf(ofd,
"licence ");
399 fprintf(ofd,
"error ");
401 fprintf(ofd,
"%d)\n",s->num_transitions());
402 for (j=s->transitions.head(); j != 0; j=j->next())
404 EST_String in = p_in_symbols.
name(s->transitions(j)->in_symbol());
405 EST_String out=p_out_symbols.
name(s->transitions(j)->out_symbol());
407 fprintf(ofd,
" (%s ",(
const char *)quote_string(in,
"\"",
"\\",1));
409 fprintf(ofd,
" (%s ",(
const char *)in);
411 fprintf(ofd,
" %s ",(
const char *)quote_string(out,
"\"",
"\\",1));
413 fprintf(ofd,
" %s ",(
const char *)out);
414 fprintf(ofd,
"%d %g)\n",
415 s->transitions(j)->state(),
416 s->transitions(j)->weight());
427 static float get_float(FILE *fd,
int swap)
431 if (swap) swapfloat(&f);
435 static int get_int(FILE *fd,
int swap)
445 EST_read_status EST_WFST::load_binary(FILE *fd,
452 int num_trans, state_type;
453 int in_sym, out_sym, next_state;
458 for (i=0; i < num_states; i++)
460 num_trans = get_int(fd,swap);
461 state_type = get_int(fd,swap);
463 if (state_type == WFST_FINAL)
465 else if (state_type == WFST_NONFINAL)
467 else if (state_type == WFST_LICENCE)
469 else if (state_type == WFST_ERROR)
473 cerr <<
"WFST load: unknown state type \"" <<
474 state_type <<
"\"" << endl;
475 r = read_format_error;
481 cerr <<
"WFST load: internal error: unexpected state misalignment"
483 r = read_format_error;
487 for (j=0; j < num_trans; j++)
489 in_sym = get_int(fd,swap);
496 out_sym = get_int(fd,swap);
497 next_state = get_int(fd,swap);
498 trans_cost = get_float(fd,swap);
500 p_states[i]->add_transition(trans_cost,next_state,in_sym,out_sym);
520 if ((fd=fopen(filename,
"r")) == NULL)
522 cerr <<
"WFST load: unable to open \"" << filename
523 <<
"\" for reading" << endl;
529 if (((r = read_est_header(ts, hinfo, ascii, t)) != format_ok) ||
532 cerr <<
"WFST load: not a WFST file \"" << filename <<
"\"" <<endl;
533 return misc_read_error;
539 read_from_string(get_c_string(read_from_string(hinfo.
val(
"in"))));
541 read_from_string(get_c_string(read_from_string(hinfo.
val(
"out"))));
545 init(inalpha,outalpha);
547 int num_states = hinfo.
ival(
"NumStates");
552 if (!hinfo.
present(
"ByteOrder"))
554 else if (((hinfo.
val(
"ByteOrder") ==
"01") ? bo_little : bo_big)
559 r = load_binary(fd,hinfo,num_states,swap);
563 for (i=0; i < num_states; i++)
565 LISP sd = lreadf(fd);
566 if (i != get_c_int(car(car(sd))))
568 cerr <<
"WFST load: expected description of state " << i <<
569 " but found \"" << siod_sprint(sd) <<
"\"" << endl;
570 r = read_format_error;
573 if (streq(
"final",get_c_string(car(cdr(car(sd))))))
575 else if (streq(
"nonfinal",get_c_string(car(cdr(car(sd))))))
577 else if (streq(
"licence",get_c_string(car(cdr(car(sd))))))
581 cerr <<
"WFST load: unknown state type \"" <<
582 siod_sprint(car(cdr(car(sd)))) <<
"\"" << endl;
583 r = read_format_error;
589 cerr <<
"WFST load: internal error: unexpected state misalignment"
591 r = read_format_error;
594 if (load_transitions_from_lisp(s,cdr(sd)) != format_ok)
596 r = read_format_error;
607 EST_read_status EST_WFST::load_transitions_from_lisp(
int s, LISP trans)
611 for (t=trans; t != NIL; t=cdr(t))
613 float w = get_c_float(siod_nth(3,car(t)));
614 int end = get_c_int(siod_nth(2,car(t)));
615 int in = p_in_symbols.
name(get_c_string(siod_nth(0,car(t))));
616 int out = p_out_symbols.
name(get_c_string(siod_nth(1,car(t))));
618 if ((in == -1) || (out == -1))
620 cerr <<
"WFST load: unknown vocabulary in state transition"
622 cerr <<
"WFST load: " << siod_sprint(car(t)) << endl;
623 return read_format_error;
625 p_states[s]->add_transition(w,end,in,out);
635 for (i=0; i < p_num_states; i++)
636 tt += p_states(i)->transitions.
length();
638 return EST_String(
"WFST ")+itoString(p_num_states)+
" states "+
639 itoString(tt)+
" transitions ";
643 void EST_WFST::more_states(
int new_max)
648 for (i=p_num_states; i < new_max; i++)
655 EST_WFST_State *s =
new EST_WFST_State(p_num_states);
657 if (p_num_states >= p_states.
length())
660 more_states((
int)((
float)(p_states.
length()+1)*1.5));
663 p_states[p_num_states] = s;
664 s->set_type(state_type);
677 for (i=0; i < p_num_states; i++)
679 EST_WFST_State *s=p_states[i];
680 for (j=s->transitions.head(); j !=0; j=j->next())
681 s->transitions(j)->set_weight(0);
692 for (i=0; i < p_num_states; i++)
694 EST_WFST_State *s=p_states[i];
695 for (t=0,j=s->transitions.head(); j !=0; j=j->next())
696 t += s->transitions(j)->weight();
698 for (j=s->transitions.head(); j !=0; j=j->next())
699 s->transitions(j)->set_weight(s->transitions(j)->weight()/t);