48 #include "EST_cmd_line.h"
55 static int scfg_train_main(
int argc,
char **argv);
94 int main(
int argc,
char **argv)
97 scfg_train_main(argc,argv);
103 static int scfg_train_main(
int argc,
char **argv)
113 "Summary: Train a stochastic context free grammar from a (bracketed) corpus\n"+
114 "-grammar <ifile> Grammar file, one rule per line.\n"+
115 "-corpus <ifile> Corpus file, one bracketed sentence per line.\n"+
116 "-method <string> {inout}\n"+
117 " Method for training: inout.\n"+
118 "-passes <int> {50}\n"+
119 " Number of training passes.\n"+
120 "-startpass <int> {0}\n"+
121 " Starting at pass N.\n"+
122 "-spread <int> Spread training data over N passes.\n"+
123 "-checkpoint <int> Save grammar every N passes\n"+
124 "-heap <int> {210000}\n"+
125 " Set size of Lisp heap, needed for large corpora\n"+
126 "-o <ofile> Output file for trained grammar.\n",
130 outfile = al.
val(
"-o");
134 siod_init(al.
ival(
"-heap"));
140 grammar.
load(al.
val(
"-grammar"));
144 cerr <<
"scfg_train: no grammar specified" << endl;
150 grammar.load_corpus(al.
val(
"-corpus"));
154 cerr <<
"scfg_train: no corpus specified" << endl;
159 spread = al.
ival(
"-spread");
163 if (al.
val(
"-method") ==
"inout")
167 checkpoint = al.
ival(
"-checkpoint");
169 grammar.train_inout(al.
ival(
"-passes"),
170 al.
ival(
"-startpass"),
171 checkpoint,spread,outfile);
175 cerr <<
"scfg_train: unknown training method \"" <<
176 al.
val(
"-method") <<
"\"" << endl;
180 if (grammar.save(outfile) != write_ok)
182 cerr <<
"scfg_train: failed to write grammar to \"" <<
183 outfile <<
"\"" << endl;