60 static int wfst_build_main(
int argc,
char **argv);
118 int main(
int argc,
char **argv)
121 wfst_build_main(argc,argv);
127 static int wfst_build_main(
int argc,
char **argv)
136 EST_String(
"[option] [rulefile0] [rulefile1] ...\n")+
137 "Summary: Build a weighted finite state transducer from rules/wfsts\n"+
138 "-type <string> {kk} Input rule type: kk, lts, rg, tl, compose, regex\n"+
139 " union, intersect, concat, asis\n"+
140 "-determinize Determinize WFST before saving it\n"+
141 "-detmin Determinize and minimize WFST before saving it\n"+
142 "-o <ofile> Output file for saved WFST (default stdout)\n"+
143 "-otype <string> {ascii}\n"+
144 " Output type, ascii or binary\n"+
145 "-heap <int> {210000}\n"+
146 " Set size of Lisp heap, needed for large rulesets\n"+
147 "-q Quiet mode, no summary generated\n",
151 outfile = al.
val(
"-o");
155 siod_init(al.
ival(
"-heap"));
158 LISP inalpha, outalpha;
160 gc_protect(&ruleset);
162 if (al.
val(
"-type") ==
"kk")
164 ruleset = car(vload(files(files.head()),1));
165 kkcompile(ruleset,*wfst);
167 else if (al.
val(
"-type") ==
"lts")
169 ruleset = car(vload(files(files.head()),1));
170 ltscompile(ruleset,*wfst);
172 else if (al.
val(
"-type") ==
"rg")
174 ruleset = car(vload(files(files.head()),1));
175 rgcompile(ruleset,*wfst);
177 else if (al.
val(
"-type") ==
"tl")
179 ruleset = car(vload(files(files.head()),1));
180 tlcompile(ruleset,*wfst);
182 else if (al.
val(
"-type") ==
"asis")
184 if (wfst->
load(files.
nth(0)) != format_ok) exit(-1);
186 else if (al.
val(
"-type") ==
"compose")
190 if (files.length() != 2)
191 EST_error(
"compose requires two WFSTs to combine");
193 if (a.
load(files.
nth(0)) != format_ok) exit(-1);
194 if (b.
load(files.
nth(1)) != format_ok) exit(-1);
198 else if (al.
val(
"-type") ==
"union")
202 if (files.length() != 2)
203 EST_error(
"union requires two WFSTs to combine");
205 if (a.
load(files.
nth(0)) != format_ok) exit(-1);
206 if (b.
load(files.
nth(1)) != format_ok) exit(-1);
210 else if (al.
val(
"-type") ==
"intersect")
214 if (files.length() != 2)
215 EST_error(
"intersect requires two WFSTs to combine");
216 if (a.
load(files.
nth(0)) != format_ok) exit(-1);
217 if (b.
load(files.
nth(1)) != format_ok) exit(-1);
221 else if (al.
val(
"-type") ==
"concat")
225 if (files.length() != 2)
226 EST_error(
"concat requires two WFSTs to combine");
227 if (a.
load(files.
nth(0)) != format_ok) exit(-1);
228 if (b.
load(files.
nth(1)) != format_ok) exit(-1);
232 else if (al.
val(
"-type") ==
"difference")
236 if (files.length() != 2)
237 EST_error(
"difference requires two WFSTs to combine");
238 if (a.
load(files.
nth(0)) != format_ok) exit(-1);
239 if (b.
load(files.
nth(1)) != format_ok) exit(-1);
243 else if (al.
val(
"-type") ==
"regex")
245 ruleset = car(vload(files(files.head()),1));
246 inalpha = siod_nth(0,ruleset);
247 outalpha = siod_nth(1,ruleset);
248 wfst->build_from_regex(inalpha,outalpha,car(cdr(cdr(ruleset))));
252 cerr <<
"wfst_build: unknown rule type \"" << al.
val(
"-type")
257 if (al.
present(
"-determinize"))
263 cout <<
"wfst_build summary: " << endl;
264 cout <<
" non-deterministic wfst: " <<
265 wfst->summary() << endl;
266 cout <<
" deterministic wfst: " <<
267 dwfst->summary() << endl;
272 else if (al.
present(
"-detmin"))
276 cout <<
"wfst_build summary: " << endl;
277 cout <<
" non-deterministic wfst: " <<
278 wfst->summary() << endl;
284 cout <<
" deterministic wfst: " <<
285 dwfst->summary() << endl;
289 cout <<
" minimized wfst: " <<
290 mwfst->summary() << endl;
297 cout <<
"wfst_build: " << wfst->summary() << endl;
300 wfst->
save(outfile,al.
val(
"-otype"));
302 gc_unprotect(&ruleset);