Edinburgh Speech Tools  2.4-release
 All Classes Functions Variables Typedefs Enumerations Enumerator Friends Pages
wfst_ops.cc
1 /*************************************************************************/
2 /* */
3 /* Centre for Speech Technology Research */
4 /* University of Edinburgh, UK */
5 /* Copyright (c) 1997 */
6 /* All Rights Reserved. */
7 /* */
8 /* Permission is hereby granted, free of charge, to use and distribute */
9 /* this software and its documentation without restriction, including */
10 /* without limitation the rights to use, copy, modify, merge, publish, */
11 /* distribute, sublicense, and/or sell copies of this work, and to */
12 /* permit persons to whom this work is furnished to do so, subject to */
13 /* the following conditions: */
14 /* 1. The code must retain the above copyright notice, this list of */
15 /* conditions and the following disclaimer. */
16 /* 2. Any modifications must be clearly marked as such. */
17 /* 3. Original authors' names are not deleted. */
18 /* 4. The authors' names are not used to endorse or promote products */
19 /* derived from this software without specific prior written */
20 /* permission. */
21 /* */
22 /* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23 /* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24 /* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25 /* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26 /* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27 /* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28 /* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29 /* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30 /* THIS SOFTWARE. */
31 /* */
32 /*************************************************************************/
33 /* Author : Alan W Black */
34 /* Date : November 1997 */
35 /*-----------------------------------------------------------------------*/
36 /* */
37 /* Basic WFST operations: minimization, determinization, intersection, */
38 /* union, composition */
39 /* */
40 /*=======================================================================*/
41 #include <iostream>
42 #include <cstdlib>
43 #include "EST_WFST.h"
44 #include "wfst_aux.h"
45 #include "EST_String.h"
46 #include "EST_TList.h"
47 #include "EST_TKVL.h"
48 #include "EST_THash.h"
49 
50 Declare_TList_T(EST_WFST_MultiState *,EST_WFST_MultiStateP)
51 
52  // Declare_KVL(int, EST_IList)
53 
54  typedef EST_TKVI<int, EST_IList> KVI_int_EST_IList_t;
55  typedef EST_TKVL<int, EST_IList> KVL_int_EST_IList_t;
56 
57  static EST_IList int_EST_IList_kv_def_EST_IList_s;
58  static int int_EST_IList_kv_def_int_s;
59 
60  template <> EST_IList *EST_TKVL< int, EST_IList >::default_val=&int_EST_IList_kv_def_EST_IList_s;
61  template <> int *EST_TKVL< int, EST_IList >::default_key=&int_EST_IList_kv_def_int_s;
62 
63  Declare_TList_N(KVI_int_EST_IList_t, 0)
64 
65 
66 #if defined(INSTANTIATE_TEMPLATES)
67 #include "../base_class/EST_TList.cc"
68 
69  Instantiate_TList_T_MIN(EST_WFST_MultiState *,EST_WFST_MultiStateP)
70 
71 #include "../base_class/EST_TKVL.cc"
72 
73  // Instantiate_KVL(int, EST_IList)
74 
75  template class EST_TKVL<int, EST_IList>;
76  template class EST_TKVI<int, EST_IList>;
77 // ostream &operator<<(ostream &s, EST_TKVI< int , EST_IList > const &i){ return s << i.k << "\t" << i.v << "\n"; }
78 // ostream& operator << (ostream& s, EST_TKVL< int , EST_IList > const &l) {EST_Litem *p; for (p = l.list.head(); p ; p = p->next()) s << l.list(p).k << "\t" << l.list(p).v << endl; return s;}
79  Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer_k, int, KVL_int_EST_IList_kitt)
80  Instantiate_TStructIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
81  Instantiate_TIterator_T(KVL_int_EST_IList_t, KVL_int_EST_IList_t::IPointer, KVI_int_EST_IList_t, KVL_int_EST_IList_itt)
82 
83  // Instantiate_TList(KVI_int_EST_IList_t)
84 
85  template class EST_TList< TLIST_KVI_int_EST_IList_t_VAL >;
86  template class EST_TItem< TLIST_KVI_int_EST_IList_t_VAL >;
87  template const char *error_name(EST_TList< KVI_int_EST_IList_t > val);
88 
89  Instantiate_TIterator_T( EST_TList<KVI_int_EST_IList_t>, EST_TList<KVI_int_EST_IList_t>::IPointer, KVI_int_EST_IList_t, TList_KVI_int_EST_IList_t_itt);
90 
91 
92 #endif
93 
95 
96 static enum wfst_state_type intersect_state_type(wfst_list &wl,
98 static int check_distinguished(const EST_WFST &nmwfst,
99  int p, int q,
100  wfst_marks &marks,
101  wfst_assumes &assumptions);
102 
103 void EST_WFST_MultiState::add(int i)
104 {
105  // If of set type only add it if its not already there
106  EST_Litem *p;
107 
108  if (p_type == wfst_ms_set)
109  for (p=head(); p != 0; p=p->next())
110  {
111  if ((*this)(p) == i)
112  return;
113  else if (i < (*this)(p)) // keep the list ordered
114  {
115  insert_before(p,i);
116  return;
117  }
118  }
119 
120  append(i);
121 }
122 
123 int multistate_index(EST_WFST_MultiStateIndex &index,
124  EST_WFST_MultiState *ms,int proposed)
125 {
126  // Returns proposed if ms is not already in index, otherwise
127  // returns the value that was proposed when it was first new.
128 
129  // I'll have to make this more efficient in future.
130  EST_String istring("");
131  EST_Litem *p;
132  int ns,found;
133 
134  for (p=ms->head(); p != 0; p = p->next())
135  istring += itoString((*ms)(p)) + " ";
136 
137  ns = index.val(istring,found);
138  if (found)
139  return ns;
140  else
141  {
142  index.add_item(istring,proposed);
143  return proposed;
144  }
145 }
146 
147 static int pair_check(EST_THash<int,int> &pairs_done, int i, int o, int odim)
148 {
149  int p;
150  int found;
151 
152  p = (i*odim)+o; // unique number representing i/o pair
153 
154  pairs_done.val(p,found);
155  if (!found)
156  { // first time seeing this pair
157  pairs_done.add_item(p,1);
158  return 0;
159  }
160  return 1;
161 
162 }
163 
164 void EST_WFST::determinize(const EST_WFST &ndwfst)
165 {
166  // Determinise a non-deterministic WFST
167  EST_WFST_MultiState *start_state,*nms,*current;
168  int ns;
169  Agenda multistate_agenda;
170  EST_WFST_MultiStateIndex index(100);
171  int i,o, new_name;
172  int c=0;
173  EST_Litem *sp, *tp;
174 
175  clear();
176  p_in_symbols.copy(ndwfst.p_in_symbols);
177  p_out_symbols.copy(ndwfst.p_out_symbols);
178 
179  // Create a starting state and add it to this WFST
180  start_state = new EST_WFST_MultiState(wfst_ms_set);
181  start_state->add(ndwfst.start_state());
182  ndwfst.add_epsilon_reachable(start_state);
183 
184  p_start_state = add_state(ndwfst.ms_type(start_state));
185  start_state->set_name(p_start_state);
186 
187  multistate_agenda.append(start_state); // initialize agenda
188 
189  while (multistate_agenda.length() > 0)
190  {
191  EST_THash<int,int> pairs_done(100);
192  current = multistate_agenda.first();
193  multistate_agenda.remove(multistate_agenda.head());
194  if ((c % 100) == 99)
195  cout << "Determinizing " << summary() << " Agenda "
196  << multistate_agenda.length() << endl;
197  c++;
198 
199  for (sp=current->head(); sp != 0; sp=sp->next())
200  {
201  const EST_WFST_State *s = ndwfst.state((*current)(sp));
202  for (tp=s->transitions.head(); tp != 0; tp = tp->next())
203  {
204  i = s->transitions(tp)->in_symbol();
205  o = s->transitions(tp)->out_symbol();
206  // Need to check if i/o has already been proposed
207  if (pair_check(pairs_done,i,o,p_out_symbols.length()) == 1)
208  continue; // already prosed those
209 // for (i=0; i < p_in_symbols.length(); i++)
210 // { // start at 2 to skip any and epsilon characters -- hmm bad
211 // for (o=0; o < p_out_symbols.length(); o++)
212 // {
213  if ((i==o) && (i==0))
214  continue; // don't deal here with epsilon transitions
215  nms = apply_multistate(ndwfst,current,i,o);
216  if ((nms->length() == 0) ||
217  (ndwfst.ms_type(nms) == wfst_error))
218  {
219  delete nms;
220  continue; // no state to go to
221  }
222  new_name = multistate_index(index,nms,p_num_states);
223  if (new_name == p_num_states) // genuinely new
224  { // create a real state and add it to the agenda
225  ns = add_state(ndwfst.ms_type(nms));
226  nms->set_name(ns);
227  multistate_agenda.append(nms);
228  }
229  else
230  {
231  nms->set_name(new_name);
232  delete nms;
233  }
234 
235  // Add new transition to current state
236  p_states[current->name()]
237  ->add_transition(nms->weight(),
238  nms->name(),
239  i,o);
240 
241  }
242  }
243  delete current;
244 
245  // Probably want some progress summary
246  }
247 
248 }
249 
252  int in, int out) const
253 {
254  // Apply in and out to ms and find all new states it becomes
255  EST_Litem *p;
256  EST_WFST_MultiState *new_ms = new EST_WFST_MultiState(wfst_ms_set);
257 
258  new_ms->clear();
259 
260  for (p=ms->head(); p != 0; p=p->next())
261  // Add all new possible states from ms(p) given in/out
262  wfst.transition_all((*ms)(p),in,out,new_ms);
263 
264  // Add epsilon reachable states from any states in multistates
265  wfst.add_epsilon_reachable(new_ms);
266 
267  return new_ms;
268 
269 }
270 
271 enum wfst_state_type EST_WFST::ms_type(EST_WFST_MultiState *ms) const
272 {
273  // Returns wfst_error if ms contains an error state, wfst_final
274  // if there is at least one final and wfst_non_final
275  EST_Litem *p;
276  enum wfst_state_type r = wfst_nonfinal;
277 
278  for (p=ms->head(); p != 0; p = p->next())
279  if (p_states((*ms)(p))->type() == wfst_error)
280  return wfst_error;
281  else if (p_states((*ms)(p))->type() == wfst_licence)
282  // wfst_licence states are generated in KK compilation
283  r = wfst_licence;
284  else if ((p_states((*ms)(p))->type() == wfst_final) &&
285  (r != wfst_licence))
286  r = wfst_final;
287 
288  if (r == wfst_licence)
289  return wfst_nonfinal;
290  else
291  return r;
292 }
293 
295  int in,
296  int out,
297  EST_WFST_MultiState *ms) const
298 {
299  // Find all possible new states from state given in/out
300  const EST_WFST_State *s = p_states(state);
301  EST_Litem *i;
302 
303  for (i=s->transitions.head(); i != 0; i=i->next())
304  {
305  if ((in == s->transitions(i)->in_symbol()) &&
306  (out == s->transitions(i)->out_symbol()))
307  ms->add(s->transitions(i)->state());
308  }
309 }
310 
311 static int is_a_member(const EST_IList &ii, int i)
312 {
313  for (EST_Litem *p=ii.head(); p != 0; p=p->next())
314  if (ii(p) == i)
315  return TRUE;
316  return FALSE;
317 }
318 
320 {
321  // As ms->add() adds in order we need to copy to a new list and append
322  // to it any new epsilon accessible states
323  EST_Litem *p;
324  EST_IList ii;
325  int ie = p_in_symbols.name(get_c_string(epsilon_label()));
326  int oe = p_out_symbols.name(get_c_string(epsilon_label()));
327 
328  for (p=ms->head(); p != 0; p=p->next())
329  ii.append((*ms)(p));
330 
331  for (p=ii.head(); p != 0; p=p->next())
332  {
333  const EST_WFST_State *s = p_states(ii(p));
334  EST_Litem *i;
335 
336  for (i=s->transitions.head(); i != 0; i=i->next())
337  {
338  if ((ie == s->transitions(i)->in_symbol()) &&
339  (oe == s->transitions(i)->out_symbol()))
340  {
341  // Add to end of ii if not already there
342  int nstate = s->transitions(i)->state();
343  if (!is_a_member(ii,nstate))
344  {
345  ii.append(nstate);
346  ms->add(nstate); // gets added in order
347  }
348  }
349  }
350  }
351 }
352 
353 /************************************************************************/
354 /* Intersection of a list of transducers, i.e. what is accepted by all */
355 /************************************************************************/
357 {
358  // This is very similar to determinisation, similar complexity too
359  EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
360  EST_WFST_MultiState *nms,*current;
361  int ns;
362  Agenda multistate_agenda;
363  EST_WFST_MultiStateIndex index(100);
364  int i,o, new_name, n;
365  EST_Litem *p,*q;
366  int c=0;
367 
368  // Initialize this WFST from the given ones
369  clear();
370  p_in_symbols.copy(wl.first().p_in_symbols);
371  p_out_symbols.copy(wl.first().p_out_symbols);
372 
373  // Determinize each input WFST and make a new start state consisting of
374  // the start states of each of the input WFSTs
375  for (p=wl.tail(); p != 0; p=p->prev())
376  {
377  if (!wl(p).deterministic())
378  {
379  cout << "...intersection deterministing" << endl;
380  EST_WFST tt = wl(p);
381  wl(p).determinize(tt);
382  }
383  start_state->add(wl(p).start_state());
384  }
385 
386  p_start_state = add_state(intersect_state_type(wl,start_state));
387  // Label multistate start start with single-state name
388  start_state->set_name(p_start_state);
389 
390  multistate_agenda.append(start_state); // initialize agenda
391 
392  while (multistate_agenda.length() > 0)
393  {
394  current = multistate_agenda.first();
395  multistate_agenda.remove(multistate_agenda.head());
396  if ((c % 100) == 99)
397  cout << "Intersection " << summary() << " Agenda "
398  << multistate_agenda.length() << endl;
399  c++;
400 
401  // For all possible in/out pairs
402  for (i=0; i < p_in_symbols.length(); i++)
403  { // start at 2 to skip any and epsilon characters -- hmm bad
404  for (o=0; o < p_out_symbols.length(); o++)
405  {
406  if ((i==o) && (i==0)) // shouldn't be epsilon/epsilon here
407  continue;
408  nms = new EST_WFST_MultiState(wfst_ms_list);
409  // Increment multistate to new multistate for each individual
410  // state using each WFST
411  for (n=0,p=wl.head(),q=current->head();
412  (p != 0) && (q != 0);
413  p=p->next(),q=q->next(),n++)
414  nms->add(wl(p).transition((*current)(q),i,o));
415  if (intersect_state_type(wl,nms) == wfst_error)
416  {
417  delete nms;
418  continue; // no state to go to
419  }
420  new_name = multistate_index(index,nms,p_num_states);
421  if (new_name == p_num_states) // genuinely new and unseen
422  { // create a real state and add it to the agenda
423  ns = add_state(intersect_state_type(wl,nms));
424  nms->set_name(ns);
425  multistate_agenda.append(nms);
426  }
427  else // already seen this state, and is already named
428  {
429  nms->set_name(new_name);
430  delete nms;
431  }
432 
433  // Add new transition to current state
434  p_states[current->name()]
435  ->add_transition(nms->weight(),nms->name(),i,o);
436  }
437  }
438  delete current;
439  // Probably want some progress summary
440  }
441 
442 }
443 
444 static enum wfst_state_type intersect_state_type(wfst_list &wl,
446 {
447  // Find the state type of the combined states
448  // If any are error, return error, if one is nonfinal return nonfinal
449  // otherwise its final
450  EST_Litem *p,*q;
451  enum wfst_state_type r = wfst_final;
452 
453  for (p=wl.head(),q=ms->head();
454  (p != 0) && (q != 0);
455  p=p->next(),q=q->next())
456  {
457  if ((*ms)(q) == WFST_ERROR_STATE)
458  return wfst_error;
459 
460  enum wfst_state_type dd = wl(p).state((*ms)(q))->type();
461 
462  if (dd == wfst_error)
463  return wfst_error;
464  else if (dd == wfst_nonfinal)
465  r = wfst_nonfinal;
466  }
467  return r;
468 }
469 
470 void EST_WFST::intersection(const EST_WFST &a, const EST_WFST &b)
471 {
472  // Intersect two WFSTs
473  wfst_list wl;
474 
475  wl.append(a);
476  wl.append(b);
477 
478  intersection(wl);
479 }
480 
481 /*******************************************************************/
482 /* Minimization is of complexity of O(n^2) */
483 /*******************************************************************/
484 void EST_WFST::minimize(const EST_WFST &nmwfst)
485 {
486  // Minimize a WFST
487  int p,q;
488  wfst_marks marks(nmwfst.num_states());
489  wfst_assumes assumptions;
490 
491  // For each combination of states
492  for (p=0; p < nmwfst.num_states()-1; p++)
493  for (q=p+1; q < nmwfst.num_states(); q++)
494  check_distinguished(nmwfst,p,q,marks,assumptions);
495 
496  // The marked array now has all different and lists to equivalent states
497  // Build an array mapping old name to new name.
498  int num_new_states;
499  int i;
500  EST_IVector state_map;
501 
502  marks.find_state_map(state_map,num_new_states);
503 
504  // Build the new minimized WFST mapping existing transitions
505  clear();
506  p_in_symbols.copy(nmwfst.p_in_symbols);
507  p_out_symbols.copy(nmwfst.p_out_symbols);
508 
509  init(num_new_states);
510  p_start_state = state_map(nmwfst.p_start_state);
511 
512  for (i=0; i < nmwfst.num_states(); i++)
513  {
514  if (p_states[state_map(i)] == 0)
515  p_states[state_map(i)] =
516  copy_and_map_states(state_map,nmwfst.state(i),nmwfst);
517  }
518 
519 }
520 
521 static int check_distinguished(const EST_WFST &nmwfst,
522  int p, int q,
523  wfst_marks &marks,
524  wfst_assumes &assumptions)
525 {
526  // Check to see if these two are equivalent
527  EST_Litem *t;
528  EST_IList from_p,from_q;
529 
530  if (marks.distinguished(p,q)) // been here, done that
531  return TRUE;
532  else if (marks.undistinguished(p,q)) // been here too
533  return FALSE;
534  // Not been here yet so do some work to try to find out if
535  // these states can be distinguished
536  else if ((nmwfst.state(p)->type() != nmwfst.state(q)->type()) ||
537  (nmwfst.state(p)->num_transitions() !=
538  nmwfst.state(q)->num_transitions()))
539  { // Different final/non-final type or different number
540  // of transitions so obviously different states
541  marks.distinguish(p,q);
542  return TRUE;
543  }
544  else
545  { // Have to check their transitions individually
546  for (t=nmwfst.state(p)->transitions.head(); t != 0; t=t->next())
547  {
548  int in = nmwfst.state(p)->transitions(t)->in_symbol();
549  int out = nmwfst.state(p)->transitions(t)->out_symbol();
550  int y = nmwfst.state(p)->transitions(t)->state();
551  int z = nmwfst.transition(q,in,out);
552  if ((z == WFST_ERROR_STATE) ||
553  (marks.distinguished(y,z)))
554  { // no equiv transition or obviously different states
555  marks.distinguish(p,q);
556  return TRUE;
557  }
558  else if (equivalent_to(y,z,assumptions))
559  continue;
560  else // Potential equivalence, record y,z for later
561  {
562  from_p.append(y);
563  from_q.append(z);
564  }
565  }
566  // All transitions had potential match so only now
567  // actually check their follow sets
568  EST_Litem *yp, *zp;
569  int tl = FALSE;
570  if (assumptions.length() == 0)
571  tl = TRUE;
572  // assume they are undistinguished
573  add_assumption(p,q,assumptions);
574  for (yp=from_p.head(),zp=from_q.head();
575  yp != 0; yp=yp->next(),zp=zp->next())
576  if (check_distinguished(nmwfst,from_p(yp),from_q(zp),
577  marks,
578  assumptions))
579  {
580  marks.distinguish(p,q); // set the distinguished
581  assumptions.clear();
582  return TRUE;
583  }
584  // ok I give up, they are the same
585  if (tl)
586  { // This is equivalent given the assumptions (and no
587  // higher level assumptions) so we can mark these states
588  // as undistinguished (and all the assumptions)
589  mark_undistinguished(marks,assumptions);
590  assumptions.clear();
591  }
592  return FALSE;
593  }
594 }
595 
596 void EST_WFST::extend_alphabets(const EST_WFST &b)
597 {
598  // Extend current in/out alphabets to accommodate anything in b's
599  // that are not in a's
600  // This guarantees that the number in this will still be valid
601  EST_StrList ivocab, ovocab;
602  int i;
603 
604  for (i=0; i<p_in_symbols.length(); i++)
605  ivocab.append(in_symbol(i));
606  for (i=0; i<b.p_in_symbols.length(); i++)
607  if (!strlist_member(ivocab,b.in_symbol(i)))
608  ivocab.append(b.in_symbol(i));
609  for (i=0; i<p_out_symbols.length(); i++)
610  ovocab.append(out_symbol(i));
611  for (i=0; i<b.p_out_symbols.length(); i++)
612  if (!strlist_member(ovocab,b.out_symbol(i)))
613  ovocab.append(b.out_symbol(i));
614 
615  p_in_symbols.init(ivocab);
616  p_out_symbols.init(ovocab);
617 }
618 
619 EST_WFST_State *EST_WFST::copy_and_map_states(const EST_IVector &state_map,
620  const EST_WFST_State *s,
621  const EST_WFST &b) const
622 {
623  // Copy s into new state mapping new states to those in state map
624  EST_WFST_State *ns = new EST_WFST_State(state_map(s->name()));
625  EST_Litem *i;
626 
627  ns->set_type(s->type());
628 
629  for (i=s->transitions.head(); i != 0; i=i->next())
630  {
631  int mapped_state= state_map(s->transitions(i)->state());
632  if (mapped_state != WFST_ERROR_STATE)
633  ns->add_transition(s->transitions(i)->weight(),
634  mapped_state,
635  in_symbol(b.in_symbol(s->transitions(i)->in_symbol())),
636  out_symbol(b.out_symbol(s->transitions(i)->out_symbol())));
637  }
638 
639  return ns;
640 }
641 
642 /***********************************************************************/
643 /* Build a WFST which doesn't accept any of the strings that a accepts */
644 /* This keeps the same in/out alphabet */
645 /***********************************************************************/
647 {
648  int i;
649 
650  copy(a);
651 
652  for (i=0; i < num_states(); i++)
653  {
654  if (p_states[i]->type() == wfst_final)
655  p_states[i]->set_type(wfst_nonfinal);
656  else if (p_states[i]->type() == wfst_nonfinal)
657  p_states[i]->set_type(wfst_final);
658  // errors remain errors
659  }
660 }
661 
662 static int noloopstostart(const EST_WFST &a)
663 {
664  // TRUE if there are no transitions leading to the start state
665  // when this is true there is a union operation which preserves
666  // deterministicness
667  int i;
668 
669  for (i=0; i < a.num_states(); i++)
670  {
671  const EST_WFST_State *s = a.state(i);
672  EST_Litem *p;
673  for (p=s->transitions.head(); p != 0; p=p->next())
674  {
675  if (s->transitions(p)->state() == a.start_state())
676  return FALSE;
677  }
678  }
679  return TRUE;
680 }
681 
682 int EST_WFST::deterministiconstartstates(const EST_WFST &a, const EST_WFST &b) const
683 {
684  // TRUE if there are no common transition labels from a and b's
685  // start state
686  EST_IMatrix tab;
687  int in,out;
688 
689  tab.resize(a.p_in_symbols.length(),a.p_out_symbols.length());
690  tab.fill(0);
691 
692  for (EST_Litem *t=a.state(a.start_state())->transitions.head();
693  t != 0; t=t->next())
694  {
695  tab(a.state(a.start_state())->transitions(t)->in_symbol(),
696  a.state(a.start_state())->transitions(t)->out_symbol()) = 1;
697  }
698 
699  for (EST_Litem *tt=b.state(b.start_state())->transitions.head();
700  tt != 0; tt=tt->next())
701  {
702  in = a.in_symbol(b.in_symbol(b.state(b.start_state())->transitions(tt)->in_symbol()));
703  out = a.out_symbol(b.out_symbol(b.state(b.start_state())->transitions(tt)->out_symbol()));
704  if (in == -1)
705  continue; // obviously not a clash
706  else if (out == -1)
707  continue; // obviously not a clash
708  else if (tab(in,out) == 1)
709  return FALSE;
710  }
711  return TRUE;
712 }
713 
714 /***********************************************************************/
715 /* Build a WFST which accepts both strings of a and of b */
716 /***********************************************************************/
717 void EST_WFST::uunion(const EST_WFST &a,const EST_WFST &b)
718 {
719  EST_IVector smap;
720  int i;
721 
722  copy(a);
723  extend_alphabets(b);
724 
725  if (a.deterministic() && b.deterministic() &&
726  noloopstostart(a) && noloopstostart(b) &&
727  deterministiconstartstates(a,b))
728  {
729  // This does the union without the epsilon and will preserve
730  // deterministic wfsts in this special case
731  smap.resize(b.num_states());
732  smap[0] = start_state();
733  for (i=1; i < b.num_states(); ++i)
734  smap[i] = i+a.num_states()-1;
735 
736  more_states(a.num_states()+b.num_states()-1);
737  p_num_states += b.num_states()-1;
738  for (i=1; i < b.num_states(); i++)
739  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
740 
741  const EST_WFST_State *s = b.state(b.start_state());
742  EST_Litem *p;
743  for (p=s->transitions.head(); p != 0; p=p->next())
744  {
745  int mapped_state= smap(s->transitions(p)->state());
746  if (mapped_state != WFST_ERROR_STATE)
747  p_states[start_state()]->
748  add_transition(s->transitions(p)->weight(),
749  mapped_state,
750  in_symbol(b.in_symbol(s->transitions(p)->in_symbol())),
751  out_symbol(b.out_symbol(s->transitions(p)->out_symbol())));
752  }
753  }
754  else
755  { // do it the hard way
756  smap.resize(b.num_states());
757  for (i=0; i < b.num_states(); ++i)
758  smap[i] = i+a.num_states();
759 
760  more_states(a.num_states()+b.num_states());
761  p_num_states += b.num_states();
762  for (i=0; i < b.num_states(); i++)
763  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
764 
765  // Actually do the union bit by adding an epsilon transition from
766  // the start of a to start state of b
767  p_states[start_state()]->add_transition(0.0,smap[b.start_state()],
768  in_epsilon(), out_epsilon());
769  }
770 
771 }
772 
773 /***********************************************************************/
774 /* Build a WFST which a followed by b */
775 /***********************************************************************/
776 void EST_WFST::concat(const EST_WFST &a,const EST_WFST &b)
777 {
778  EST_IVector smap;
779  int i;
780 
781  copy(a);
782  extend_alphabets(b);
783 
784  smap.resize(b.num_states());
785  for (i=0; i < b.num_states(); ++i)
786  smap[i] = i+a.num_states();
787 
788  more_states(a.num_states()+b.num_states());
789 
790  // everything final in a becomes non final and an epsilon transition
791  // goes from them to the start of b
792  for (i=0; i < num_states(); i++)
793  {
794  if (p_states[i]->type() == wfst_final)
795  {
796  p_states[i]->set_type(wfst_nonfinal);
797  p_states[i]->add_transition(0.0,smap[b.start_state()],
798  in_epsilon(), out_epsilon());
799  }
800  }
801 
802  p_num_states += b.num_states();
803  for (i=0; i < b.num_states(); i++)
804  p_states[smap(i)] = copy_and_map_states(smap,b.state(i),b);
805 
806 }
807 
808 /***********************************************************************/
809 /* Build a WFST from composing a and b (feeding output of a to */
810 /* input of b) */
811 /***********************************************************************/
812 void EST_WFST::compose(const EST_WFST &a,const EST_WFST &b)
813 {
814  EST_WFST_MultiState *start_state = new EST_WFST_MultiState(wfst_ms_list);
815  EST_WFST_MultiState *nms,*current;
816  Agenda multistate_agenda;
817  EST_WFST_MultiStateIndex index(100);
818  wfst_list wl;
819  EST_WFST t;
820  int i,new_name;
821  EST_Litem *p,*q;
822 
823  clear();
824  p_in_symbols.copy(a.p_in_symbols);
825  p_out_symbols.copy(b.p_out_symbols);
826 
827  // Unfortunately need to needlessly copy a and b here
828  wl.append(a);
829  start_state->add(a.start_state());
830  wl.append(b);
831  start_state->add(b.start_state());
832 
833  p_start_state = add_state(intersect_state_type(wl,start_state));
834  // Label multistate start start with single-state name
835  start_state->set_name(p_start_state);
836 
837  multistate_agenda.append(start_state); // initialize agenda
838 
839  while (multistate_agenda.length() > 0)
840  {
841  current = multistate_agenda.first();
842  multistate_agenda.remove(multistate_agenda.head());
843 
844  // For all possible in/out pairs
845  for (i=0; i < p_in_symbols.length(); i++)
846  { // start at 2 to skip any and epsilon characters -- hmm bad
847  // find transitions
848  wfst_translist transa;
849 
850  wl.first().transduce(current->first(),i,transa);
851  for (p=transa.head(); p != 0; p=p->next())
852  {
853  wfst_translist transb;
854  // feed a's out to b'is in
855  wl.last().transduce(
856  current->last(),
857  b.in_symbol(a.out_symbol(transa(p)->out_symbol())),
858  transb);
859  for (q = transb.head(); q != 0; q=q->next())
860  {
861  nms = new EST_WFST_MultiState(wfst_ms_list);
862  nms->add(transa(p)->state());
863  nms->add(transb(q)->state());
864 
865  if (intersect_state_type(wl,nms) == wfst_error)
866  {
867  delete nms;
868  continue; // no state to go to
869  }
870  new_name = multistate_index(index,nms,p_num_states);
871  if (new_name == p_num_states) // genuinely new and unseen
872  { // create a real state and add it to the agenda
873  int ns = add_state(intersect_state_type(wl,nms));
874  nms->set_name(ns);
875  multistate_agenda.append(nms);
876  }
877  else // already seen this state, and is already named
878  nms->set_name(new_name);
879 
880  // Add new transition to current state
881  p_states[current->name()]
882  ->add_transition(nms->weight(),nms->name(),
883  i,transb(q)->out_symbol());
884 
885  }
886 
887  }
888  }
889  delete current;
890  // Probably want some progress summary
891  }
892 
893 }
894 
895 /***********************************************************************/
896 /* Build a WFST which accepts strings in a but not in b */
897 /***********************************************************************/
898 void EST_WFST::difference(const EST_WFST &a,const EST_WFST &b)
899 {
900  EST_WFST compb;
901 
902  // This is sort of a complement, but not quite
903  // But what would the name of this operation be?
904  compb.copy(b);
905  for (int i=0; i < compb.num_states(); i++)
906  {
907  if (compb.p_states[i]->type() == wfst_final)
908  compb.p_states[i]->set_type(wfst_error);
909  }
910 
911  uunion(a,compb);
912 }
913 
914 /***********************************************************************/
915 /* Remove states from which a final state can't be reached */
916 /***********************************************************************/
918 {
919  // Find all states which are reachable from the start state, and
920  // can reach a final state. Mark all others as error states
921  wfst_list wl;
922 
923  wl.append(a);
924  EST_WFST &ab = wl.first();
925 
926  ab.current_tag = ++traverse_tag;
927  for (int i=0; i < ab.p_num_states; i++)
928  ab.can_reach_final(i);
929  // This will copy only the non-error states
930  intersection(wl);
931 
932 }
933 
934 int EST_WFST::can_reach_final(int state)
935 {
936  // Return TRUE iff this state is final or can reach a final state
937 
938  if (p_states[state]->type() == wfst_final)
939  return TRUE;
940  else if (p_states[state]->type() == wfst_error)
941  return FALSE;
942  else if (p_states[state]->tag() == current_tag)
943  // Been here and it is reachable
944  return TRUE;
945  else
946  {
947  EST_Litem *i;
948  enum wfst_state_type current_val = p_states[state]->type();
949  enum wfst_state_type r = wfst_error;
950  // temporarily set this to error to stop infinite recursion
951  p_states[state]->set_type(wfst_error);
952 
953  for (i=p_states[state]->transitions.head(); i != 0; i=i->next())
954  // if any transition goes to something that reaches a final state
955  // set the value back to its original
956  if (can_reach_final(p_states[state]->transitions(i)->state()))
957  r = current_val;
958 
959  // Will be set back to original value iff a final state
960  // is reachable from here
961  p_states[state]->set_type(r);
962  if (r == wfst_error)
963  return FALSE;
964  else
965  {
966  p_states[state]->set_tag(current_tag);
967  return TRUE;
968  }
969  }
970 }
971 
972 /***********************************************************************/
973 /* True is wfst is deterministic */
974 /***********************************************************************/
976 {
977  // True if all states contains no multiple arcs with the same symbol
978  EST_IMatrix tab;
979 
980  tab.resize(p_in_symbols.length(),p_out_symbols.length());
981 
982  for (int i=0; i < p_num_states; i++)
983  {
984  tab.fill(0);
985  for (EST_Litem *t=state(i)->transitions.head(); t != 0; t=t->next())
986  {
987  if (tab(state(i)->transitions(t)->in_symbol(),
988  state(i)->transitions(t)->out_symbol()) == 1)
989  return FALSE;
990  else
991  tab(state(i)->transitions(t)->in_symbol(),
992  state(i)->transitions(t)->out_symbol()) = 1;
993  }
994  }
995  return TRUE;
996 }