44 #include "EST_SCFG_Chart.h"
46 EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge()
50 EST_SCFG_Chart_Edge::EST_SCFG_Chart_Edge(
double prob,
60 EST_SCFG_Chart_Edge::~EST_SCFG_Chart_Edge()
64 LISP EST_SCFG_Chart::print_edge(
int start,
int end,
int p,
73 else if (start+1 == end)
77 cons(flocons(e->
prob()),
89 d1 = edges[start][e->
pos()][e->
d1()];
90 d2 = edges[e->
pos()][end][e->
d2()];
93 cons(print_edge(start,e->
pos(),e->
d1(),d1),
94 cons(print_edge(e->
pos(),end,e->
d2(),d2),
97 cons(flocons(e->
prob()),
105 EST_SCFG_Chart::EST_SCFG_Chart()
111 grammar_local = TRUE;
115 EST_SCFG_Chart::~EST_SCFG_Chart()
128 grammar_local = FALSE;
129 grammar = &imported_grammar;
152 for (n_vertices=1,p=s; p != e; p=p->next())
156 for (n=0,p=s; p != e; p=p->next(),n++)
161 cerr <<
"SCFG_Chart: unknown terminal symbol \"" <<
162 p->f(name).
string() <<
"\"" << endl;
169 void EST_SCFG_Chart::delete_edge_table()
173 if (wfst == 0)
return;
175 for (i=0; i < n_vertices; i++)
178 for (j=0; j < n_vertices; j++)
181 if (edges[i][j][k] != emptyedge)
182 delete edges[i][j][k];
183 delete [] edges[i][j];
196 void EST_SCFG_Chart::setup_edge_table()
205 for (i=0; i < n_vertices; i++)
209 for (j=0; j < n_vertices; j++)
212 for (k=0; k < nt; k++)
218 double EST_SCFG_Chart::find_best_tree_cal(
int start,
int end,
int p)
222 int best_q = -1, best_r = -1;
223 double best_prob = 0;
227 best_prob = grammar->
prob_U(p,wfst[start]->d1());
229 edges[start][end][p] =
231 wfst[start]->d1(),0,-1);
233 edges[start][end][p] = emptyedge;
239 double s=0,t_prob,left,right;
243 for (q=0; q < nt; q++)
244 for (r=0; r < nt; r++)
246 double pBpqr = grammar->
prob_B(p,q,r);
249 for (j=start+1; j < end; j++)
251 left=find_best_tree(start,j,q);
254 right = find_best_tree(j,end,r);
255 t_prob = pBpqr * left * right;
256 if (t_prob > best_prob)
270 edges[start][end][p] =
273 edges[start][end][p] = emptyedge;
281 if (n_vertices - 1 > 0)
282 find_best_tree(0,n_vertices-1,grammar->distinguished_symbol());
291 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
296 return print_edge(0,n_vertices-1,grammar->distinguished_symbol(),top);
318 for (num_words=0,p=s; p != e; p=p->next())
321 if (num_words != (n_vertices-1))
323 cerr <<
"SCFG_Chart: extract_parse, number of items in link stream " <<
324 " different from those in parse tree" << endl;
331 top = edges[0][n_vertices-1][grammar->distinguished_symbol()];
339 extract_edge(0,n_vertices-1,grammar->distinguished_symbol(),top,
342 if ((force) && (!daughter1(ss)))
343 extract_forced_parse(0,n_vertices-1,ss,w);
348 void EST_SCFG_Chart::extract_forced_parse(
int start,
int end,
356 s->append_daughter(w);
357 s->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
362 extract_forced_parse(start,start+1,s->append_daughter(),w);
363 EST_Item *st = s->append_daughter();
364 st->set_name(grammar->
nonterminal(grammar->distinguished_symbol()));
367 extract_forced_parse(start+1,end,st,nw);
371 void EST_SCFG_Chart::extract_edge(
int start,
int end,
int p,
382 else if (start+1 == end)
385 s->append_daughter((*word));
387 s->
set(
"prob",(
float)e->
prob());
388 *word = (*word)->next();
396 d1 = edges[start][e->
pos()][e->
d1()];
397 d2 = edges[e->
pos()][end][e->
d2()];
400 s->append_daughter();
401 s->append_daughter();
403 extract_edge(start,e->
pos(),e->
d1(),d1,daughter1(s),word);
404 extract_edge(e->
pos(),end,e->
d2(),d2,daughter2(s),word);
407 s->
set(
"prob",(
float)e->
prob());
413 void EST_SCFG_chart_load_relation(
EST_Relation &s,LISP sent)
419 for (w=sent; w != NIL; w=cdr(w))
425 word->set_name(get_c_string(car(car(w))));
426 if (consp(car(cdr(car(w)))))
427 for (f=car(cdr(car(w))); f != NIL; f=cdr(f))
429 if (FLONUMP(car(cdr(car(f)))))
430 word->
set(get_c_string(car(car(f))),
431 get_c_float(car(cdr(car(f)))));
433 word->
set(get_c_string(car(car(f))),
434 get_c_string(car(cdr(car(f)))));
437 word->
set(
"name",get_c_string(car(cdr(car(w)))));
440 word->
set(
"name",get_c_string(car(w)));
460 LISP scfg_parse(LISP
string, LISP grammar)
469 EST_SCFG_chart_load_relation(words,
string);
477 LISP scfg_parse(LISP
string,
EST_SCFG &grammar)
486 EST_SCFG_chart_load_relation(words,
string);
494 LISP scfg_bracketing_only(LISP parse)
496 if (consp(siod_nth(4,parse)))
500 for (d=cdr(cdr(cdr(cdr(parse)))),ds=NIL; d != NIL; d=cdr(d))
501 ds = cons(scfg_bracketing_only(car(d)),ds);
505 return siod_nth(4,parse);
519 int fully_contained=0;
521 for (c=0; c < corpus.
length(); c++)
523 LISP flat = siod_flatten(corpus.
a_no_check(c).string());
525 parse = scfg_parse(flat,*
this);
535 count_bracket_crossing(corpus.
a_no_check(c),parsed,vs);
542 cout <<
"cross bracketing " << cb.
mean()*100 <<
" (" << failed <<
543 " failed " << (float)(100.0*fully_contained)/corpus.
length() <<
544 "% fully consistent from " << corpus.
length()
545 <<
" sentences)" << endl;
555 if (ref.length() != test.length())
557 EST_error(
"bracket_crossing: sentences of different lengths");
560 for (i=0; i < ref.length(); i++)
561 for (j=i+1; j <= ref.length(); j++)
562 if (test.
valid(i,j) == 1)
564 if (ref.
valid(i,j) == 0)