Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
pda_main.cc
1
/*************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1996 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/*************************************************************************/
33
/* Author : Paul Taylor */
34
/* Date : May 1994 */
35
/*-----------------------------------------------------------------------*/
36
/* Pitch Detection Algorithm Main routine */
37
/* */
38
/*=======================================================================*/
39
#include <fstream>
40
#include "EST.h"
41
#include "sigpr/EST_sigpr_utt.h"
42
#include "EST_cmd_line_options.h"
43
44
void
set_parameters(
EST_Features
&a_list,
EST_Option
&al);
45
46
void
option_override(
EST_Features
&op,
EST_Option
al,
47
const
EST_String
&option,
const
EST_String
&arg);
48
49
static
int
save_pm(
EST_String
filename,
EST_Track
fz);
50
51
/** @name <command>pda</command> <emphasis>Pitch Detection Algorithm</emphasis>
52
@id pda-manual
53
* @toc
54
*/
55
56
//@{
57
58
/**@name Synopsis
59
*/
60
//@{
61
62
//@synopsis
63
64
/**
65
pda is a pitch detection algorithm that produces a fundamental frequency
66
contour from a speech waveform file. At present only the
67
super resolution pitch determination algorithm is implemented.
68
See (Medan, Yair, and Chazan, 1991) and (Bagshaw et al., 1993) for a detailed
69
description of the algorithm.
70
</para><para>
71
72
The default values given below were found to optimise the performance
73
of the pitch determination algorithm for speech data sampled at 20kHz
74
using a 16\-bit waveform and low pass filter with a 600Hz cut-off
75
frequency and more than \-85dB rejection above 700Hz. The best
76
performances occur if the [\-p] flag is passed. </para><para>
77
*/
78
79
//@}
80
81
/**@name Options
82
*/
83
//@{
84
85
//@options
86
87
//@}
88
89
90
int
main (
int
argc,
char
*argv[])
91
{
92
EST_Track
fz;
93
EST_Wave
sig;
94
EST_Option
al;
95
EST_Features
op;
96
EST_String
out_file(
"-"
);
97
EST_StrList
files;
98
99
parse_command_line
100
(argc, argv,
101
EST_String
(
"[input file] -o [output file] [options]\n"
)+
102
"Summary: pitch track waveform files\n"
103
"use \"-\" to make input and output files stdin/out\n"
104
"-h Options help\n\n"
+
105
options_wave_input()+
106
options_pda_general()+
107
options_pda_srpd()+
108
options_track_output(),
109
files, al);
110
111
default_pda_options(op);
112
set_parameters(op, al);
113
114
if
(read_wave(sig, files.
first
(), al) != format_ok)
115
exit(-1);
116
117
out_file = al.
present
(
"-o"
) ? al.
val
(
"-o"
) : (
EST_String
)
"-"
;
118
119
pda(sig, fz, op);
// do f0 tracking
120
121
if
(al.
present
(
"-pm"
))
122
save_pm(out_file, fz);
123
else
124
fz.
save
(out_file, op.
S
(
"f0_file_type"
,
"0"
));
125
126
if
(al.
present
(
"-diff"
))
127
{
128
fz = differentiate(fz);
129
fz.
save
(out_file +
".diff"
, op.
S
(
"f0_file_type"
,
"0"
));
130
}
131
return
0;
132
}
133
134
135
void
set_parameters(
EST_Features
&op,
EST_Option
&al)
136
{
137
op.
set
(
"srpd_resize"
, 1);
138
139
// general options
140
option_override(op, al,
"pda_frame_shift"
,
"-shift"
);
141
option_override(op, al,
"pda_frame_length"
,
"-length"
);
142
option_override(op, al,
"max_pitch"
,
"-fmax"
);
143
option_override(op, al,
"min_pitch"
,
"-fmin"
);
144
145
// low pass filtering options.
146
option_override(op, al,
"lpf_cutoff"
,
"-u"
);
147
option_override(op, al,
"lpf_order"
,
"-forder"
);
148
149
option_override(op, al,
"decimation"
,
"-d"
);
150
option_override(op, al,
"noise_floor"
,
"-n"
);
151
option_override(op, al,
"min_v2uv_coef_thresh"
,
"-m"
);
152
option_override(op, al,
"v2uv_coef_thresh_ratio"
,
"-R"
);
153
option_override(op, al,
"v2uv_coef_thresh"
,
"-H"
);
154
option_override(op, al,
"anti_doubling_thresh"
,
"-t"
);
155
option_override(op, al,
"peak_tracking"
,
"-P"
);
156
157
option_override(op, al,
"f0_file_type"
,
"-otype"
);
158
option_override(op, al,
"wave_file_type"
,
"-itype"
);
159
160
if
(al.
val
(
"-L"
, 0) ==
"true"
)
161
op.
set
(
"do_low_pass"
,
"true"
);
162
if
(al.
val
(
"-R"
, 0) ==
"true"
)
163
op.
set
(
"do_low_pass"
,
"false"
);
164
165
166
/* op.set("lpf_cutoff",al.val("-u", 0));
167
op.set("lpf_order",al.val("-forder", 0));
168
169
//sprd options
170
op.set("decimation", al.val("-d", 0));
171
op.set("noise_floor", al.val("-n", 0));
172
op.set("min_v2uv_coef_thresh", al.val("-m", 0));
173
op.set("v2uv_coef_thresh_ratio", al.val("-r", 0));
174
op.set("v2uv_coef_thresh", al.val("-H", 0));
175
op.set("anti_doubling_thresh", al.val("-t", 0));
176
op.set("peak_tracking", al.val("-P", 0));
177
if (al.val("-L", 0) == "true")
178
op.set("do_low_pass", "true");
179
if (al.val("-R", 0) == "true")
180
op.set("do_low_pass", "false");
181
op.set("f0_file_type", al.val("-otype", 0));
182
op.set("wave_file_type", al.val("-itype", 0));
183
*/
184
}
185
186
/* a_list.override_val("sample_rate", al.val("-f", 0));
187
a_list.override_val("min_pitch", al.val("-fmin", 0));
188
a_list.override_val("max_pitch", al.val("-fmax", 0));
189
a_list.override_val("pda_frame_shift", al.val("-s", 0));
190
a_list.override_val("pda_frame_length",al.val("-l", 0));
191
192
// low pass filtering options.
193
a_list.override_val("lpf_cutoff",al.val("-u", 0));
194
a_list.override_val("lpf_order",al.val("-forder", 0));
195
196
//sprd options
197
a_list.override_val("decimation", al.val("-d", 0));
198
a_list.override_val("noise_floor", al.val("-n", 0));
199
a_list.override_val("min_v2uv_coef_thresh", al.val("-m", 0));
200
a_list.override_val("v2uv_coef_thresh_ratio", al.val("-r", 0));
201
a_list.override_val("v2uv_coef_thresh", al.val("-H", 0));
202
a_list.override_val("anti_doubling_thresh", al.val("-t", 0));
203
a_list.override_val("peak_tracking", al.val("-P", 0));
204
if (al.val("-L", 0) == "true")
205
a_list.override_val("do_low_pass", "true");
206
if (al.val("-R", 0) == "true")
207
a_list.override_val("do_low_pass", "false");
208
a_list.override_val("f0_file_type", al.val("-otype", 0));
209
a_list.override_val("wave_file_type", al.val("-itype", 0));
210
*/
211
212
213
static
int
save_pm(
EST_String
filename,
EST_Track
fz)
214
{
215
ostream *outf;
216
float
position, period;
217
218
if
(filename ==
"-"
)
219
outf = &cout;
220
else
221
outf =
new
ofstream(filename);
222
223
if
(!(*outf))
224
{
225
cerr <<
"save_pm: can't write to file \""
<< filename <<
"\""
<< endl;
226
return
-1;
227
}
228
229
*outf <<
"XAO1\n\n"
;
// xmg header identifier.
230
*outf <<
"LineType bars \n"
;
231
*outf <<
"LineStyle solid \n"
;
232
*outf <<
"LineWidth 0 \n"
;
233
*outf <<
"Freq 16\n"
;
234
*outf <<
"Format Binary \n"
;
235
*outf << char(12) <<
"\n"
;
// control L character
236
237
position = 0.0;
238
int
gap = 0;
239
for
(
int
i = 0; i < fz.
num_frames
(); ++i)
240
{
241
if
(fz.
val
(i))
242
{
243
if
(gap)
244
{
245
position = fz.
t
(i);
246
gap = 0;
247
}
248
period = 1.0 / fz.
a
(i);
249
*outf << (position + period) * 1000.0 << endl;
250
position += period;
251
}
252
else
253
gap = 1;
254
}
255
256
if
(outf != &cout)
257
delete
outf;
258
259
return
0;
260
}
261
262
/**@name Examples
263
264
Pitch detection on typical male voice, using low pass filtering:
265
<screen>
266
$ pda kdt_010.wav -o kdt_010.f0 -fmin 80 -fmax 200 -L
267
</screen>
268
*/
269
//@{
270
271
//@}
272
//@}
main
pda_main.cc
Generated on Wed Dec 24 2014 09:16:35 for Edinburgh Speech Tools by
1.8.3.1