Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
wfst_train_main.cc
1
/*************************************************************************/
2
/* */
3
/* Language Technologies Institute */
4
/* Carnegie Mellon University */
5
/* Copyright (c) 1999 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* CARNEGIE MELLON UNIVERSITY AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL CARNEGIE MELLON UNIVERSITY NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/*************************************************************************/
33
/* Author : Alan W Black */
34
/* Date : October 1999 */
35
/*-----------------------------------------------------------------------*/
36
/* A training method for splitting states in a WFST from data */
37
/* */
38
/*=======================================================================*/
39
#include <cstdlib>
40
#include <cstdio>
41
#include <iostream>
42
#include <fstream>
43
#include <cstring>
44
#include "EST.h"
45
#include "EST_simplestats.h"
46
#include "EST_WFST.h"
47
48
LISP load_string_data(
EST_WFST
&wfst,
EST_String
&filename);
49
void
wfst_train(
EST_WFST
&wfst, LISP data);
50
51
static
int
wfst_train_main(
int
argc,
char
**argv);
52
53
/** @name <command>wfst_train</command> <emphasis>Train a weighted finite-state transducer</emphasis>
54
@id wfst-train-manual
55
* @toc
56
*/
57
58
//@{
59
60
61
/**@name Synopsis
62
*/
63
//@{
64
65
//@synopsis
66
67
/**
68
This takes an existing WFST and data and splits states in an entropy
69
reduce way to produced a new WFST that better models the given data.
70
71
*/
72
73
//@}
74
75
/**@name OPTIONS
76
*/
77
//@{
78
79
//@options
80
81
//@}
82
83
84
int
main(
int
argc,
char
**argv)
85
{
86
87
wfst_train_main(argc,argv);
88
89
exit(0);
90
return
0;
91
}
92
93
static
int
wfst_train_main(
int
argc,
char
**argv)
94
{
95
// Train a WFST from data building new states
96
EST_Option
al;
97
EST_StrList
files;
98
EST_String
wfstfile;
99
FILE *ofd;
100
101
parse_command_line
102
(argc, argv,
103
EST_String
(
"[WFSTFILE] [input file0] ... [-o output file]\n"
)+
104
"Summary: Train a WFST on data\n"
+
105
"-wfst <ifile> The WFST to start from\n"
+
106
"-data <ifile> Sentences in the language recognised by WFST\n"
+
107
"-o <ofile> Output file for trained WFST\n"
+
108
"-heap <int> {210000}\n"
+
109
" Set size of Lisp heap, needed for large rulesets\n"
,
110
files, al);
111
112
if
(al.
present
(
"-o"
))
113
{
114
if
((ofd=fopen(al.
val
(
"-o"
),
"w"
)) == NULL)
115
EST_error(
"can't open output file for writing \"%s\""
,
116
(
const
char
*)al.
val
(
"-o"
));
117
}
118
else
119
ofd = stdout;
120
121
if
(al.
present
(
"-wfst"
))
122
wfstfile = al.
val
(
"-wfst"
);
123
else
124
EST_error(
"no WFST specified"
);
125
126
siod_init(al.
ival
(
"-heap"
));
127
siod_est_init();
128
129
EST_WFST
wfst;
130
LISP data;
131
132
if
(wfst.
load
(wfstfile) != format_ok)
133
EST_error(
"failed to read WFST from \"%s\""
,
134
(
const
char
*)wfstfile);
135
136
data = load_string_data(wfst,al.
val
(
"-data"
));
137
138
wfst_train(wfst,data);
139
140
if
(wfst.
save
(al.
val
(
"-o"
)) != write_ok)
141
EST_error(
"failed to write trained WFST to \"%s\""
,
142
(
const
char
*)al.
val
(
"-o"
));
143
144
return
0;
145
146
}
147
main
wfst_train_main.cc
Generated on Wed Dec 24 2014 09:16:35 for Edinburgh Speech Tools by
1.8.3.1