55 static int scfg_make_main(
int argc,
char **argv);
59 static LISP assign_probs(LISP rules,
const EST_String &domain,
61 static LISP make_all_rules(
const EST_StrList &NonTerminals,
63 static void generate_probs(
double *probs,
int num);
100 int main(
int argc,
char **argv)
103 scfg_make_main(argc,argv);
109 static int scfg_make_main(
int argc,
char **argv)
121 "Summary: Build a stochastic context free grammar\n"+
122 "-nonterms <string> Number of nonterminals or file containing them\n"+
123 "-terms <string> Number of terminals or file containing them\n"+
124 "-domain <string> {nlogp}\n"+
125 " Values to be nlogp (negative log probabilities)\n"+
126 " or prob (probabilities)\n"+
127 "-values <string> {equal}\n"+
128 " General initial scores on rules as equal or\n"
130 "-heap <int> {500000}\n"+
131 " Set size of Lisp heap, only needed for large grammars\n"+
132 "-o <ofile> File to save grammar (default stdout)\n",
136 outfile = al.
val(
"-o");
142 if (al.
val(
"-domain") ==
"nlogp")
144 else if (al.
val(
"-domain") ==
"prob")
148 cerr <<
"scfg_make: domain must be nlogp or prob" << endl;
155 if (al.
val(
"-values") ==
"equal")
157 else if (al.
val(
"-values") ==
"random")
161 cerr <<
"scfg_make: values must be equal or random" << endl;
169 make_symbols(NonTerminals,al.
ival(
"-nonterms"),
"NT");
171 load_symbols(NonTerminals,al.
val(
"-nonterms"));
175 cerr <<
"scfg_make: no nonterminals specified" << endl;
182 make_symbols(Terminals,al.
ival(
"-terms"),
"T");
184 load_symbols(Terminals,al.
val(
"-terms"));
188 cerr <<
"scfg_make: no terminals specified" << endl;
192 siod_init(al.
ival(
"-heap"));
194 rules = make_all_rules(NonTerminals,Terminals);
195 rules = assign_probs(rules,domain,values);
201 if ((fd=fopen(outfile,
"w")) == NULL)
203 cerr <<
"scfg_make: failed to open file \"" << outfile <<
204 "\" for writing" << endl;
209 for (r=rules; r != NIL; r=cdr(r))
210 pprint_to_fd(fd,car(r));
219 static LISP make_all_rules(
const EST_StrList &NonTerminals,
227 for (p=NonTerminals.head(); p != 0; p=p->next())
229 int num_rules_nt = (NonTerminals.length()*NonTerminals.length())+
231 double *probs =
new double[num_rules_nt];
232 generate_probs(probs,num_rules_nt);
234 for (q=NonTerminals.head(); q != 0; q=q->next())
235 for (r=NonTerminals.head(); r != 0; r=r->next(),i++)
236 rules = cons(cons(flocons(probs[i]),
237 cons(rintern(NonTerminals(p)),
238 cons(rintern(NonTerminals(q)),
239 cons(rintern(NonTerminals(r)),NIL)))),
241 for (q=Terminals.head(); q != 0; q=q->next(),i++)
242 rules = cons(cons(flocons(probs[i]),
243 cons(rintern(NonTerminals(p)),
244 cons(rintern(Terminals(q)),NIL))),
249 return reverse(rules);
252 static void generate_probs(
double *probs,
int num)
257 if (values ==
"equal")
259 double defp = 1.0/(float)num;
260 for (i=0; i < num; i++)
263 else if (values ==
"random")
267 for (i=0; i < num; i++)
269 probs[i] = (double)abs(rand())/(
double)0x7fff;
272 for (i=0; i < num; i++)
279 cerr <<
"scfg_make: unknown value for probability distribution"
285 static LISP assign_probs(LISP rules,
const EST_String &domain,
292 if (domain ==
"nlogp")
293 for (r=rules; r != NIL; r = cdr(r))
295 if (get_c_float(car(car(r))) == 0)
296 CAR(car(r)) = flocons(40);
298 CAR(car(r)) = flocons(-log(get_c_float(car(car(r)))));
310 for (magnitude=0,t=n; t > 0; t=t/10)
313 char *name = walloc(
char,prefix.
length()+magnitude+1);
314 char *skel = walloc(
char,prefix.
length()+5);
315 sprintf(skel,
"%s%%%02dd",(
const char *)prefix,magnitude);
317 for (i=0; i < n; i++)
319 sprintf(name,skel,i);
333 load_StrList(filename,syms);