Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
track_example.cc
1
/************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1996,1997 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/*************************************************************************/
33
/* */
34
/* Author: Richard Caley (rjc@cstr.ed.ac.uk) */
35
/* Date: Fri May 9 1997 */
36
/* ------------------------------------------------------------------- */
37
/* Example of declaration and use of tracks. */
38
/* */
39
/*************************************************************************/
40
41
42
#include <iostream>
43
#include <cstdlib>
44
#include "EST_Track.h"
45
#include "EST_Wave.h"
46
#include "EST_sigpr.h"
47
#include "EST_error.h"
48
49
50
/** @name EST_Track class example code
51
* @toc
52
* Some examples of track manipulations.
53
*
54
*/
55
//@{
56
57
int
main(
void
)
58
59
{
60
int
i, j;
61
62
/* This program is designed as an example not as something to run
63
so for testing purpose it simply exists */
64
exit(0);
65
/**@name Initialising and Resizing a Track
66
67
The constructor functions can be used to create a track with
68
zero frames and channels or a track with a specified number of
69
frames and channels
70
*/
71
72
//@{
73
//@{ code
74
EST_Track
tr;
// default track declaration
75
EST_Track
tra(500, 10);
// allocate track with 500 frames and 10 channels
76
//@} code
77
78
/** tracks can be resized at any time:
79
*/
80
//@{ code
81
tr.
resize
(10, 500);
// resize track to have 10 frames and 500 channels
82
tr.
resize
(500, 10);
// resize track to have 500 frames and 10 channels
83
//@} code
84
85
/** by default, resizing preserves values in the track. This
86
may involve copying some information, so if the existing values
87
are not needed, a flag can be set which usually results in
88
quicker resizing
89
*/
90
//@{ code
91
tr.
resize
(250, 5, 0);
// throw away any existing values
92
//@} code
93
/** If only the number of channels or the number of frames needs
94
to be changed, this an be done with the following functions:
95
*/
96
97
//@{ code
98
tr.
set_num_channels
(10);
// makes 10 channels, keeps same no of frames
99
100
tr.
set_num_frames
(400);
// makes 400 frames, keeps same no of channels
101
//@} code
102
/** The preserve flag works in the same way with these functions
103
*/
104
//@}
105
106
/** @name Simple Access
107
108
Values in the track can be accessed and set by frame
109
number and channel number.
110
111
The following resizes a track to have 500 frames and 10 channels
112
and fills every position with -5.
113
*/
114
//@{
115
//@{ code
116
tr.
resize
(500, 10);
117
118
for
(i = 0; i < tr.
num_frames
(); ++i)
119
for
(j = 0; j < tr.
num_channels
(); ++j)
120
tr.
a
(i, j) = -5.0;
121
122
//@} code
123
124
/** A well formed track will have a time value, specified in seconds,
125
for every frame. The time array can be filled directly:
126
*/
127
//@{ code
128
for
(i = 0; i < tr.
num_frames
(); ++i)
129
tr.
t
(i) = (float) i * 0.01;
130
//@} code
131
/** which fills the time array with values 0.01, 0.02,
132
0.03... 5.0. However, A shortcut function is provided for fixed
133
frame spacing:
134
*/
135
//@{ code
136
tr.
fill_time
(0.1);
137
138
//@} code
139
/** which performs the same operation as above. Frames do not have
140
to be evenly spaced, in pitch synchronous processing the time
141
array holds the time position of each pitch period. In such
142
cases each position in the time array must obviously be set
143
individually.</para><para>
144
145
Some representations have undefined values during certain
146
sections of the track, for example the F0 value during
147
unvoiced speech.</para><para>
148
149
The break/value array can be used to specify if a frame has an
150
undefined value.<para></para>. If a frame in this array is 1,
151
that means the amplitude is defined at that point. If 0, the
152
amplitude is undefined. By default, every frame has a value.
153
</para><para>
154
155
Breaks (undefined values) can be set by <method>set_break()
156
</method>. The following sets every frame from 50 to 99 as a
157
break:
158
*/
159
//@{ code
160
for
(i = 50; i < 100; ++i)
161
tr.
set_break
(i);
162
//@} code
163
/** frames can be turned back to values as follows:
164
*/
165
//@{ code
166
for
(i = 50; i < 100; ++i)
167
tr.
set_value
(i);
168
//@} code
169
/** It is up to individual functions to decide how to interpret breaks.
170
</para><para>
171
A frame's status can be checked as follows:
172
*/
173
//@{ code
174
if
(tr.
val
(60))
175
cout <<
"Frame 60 is not a break\n"
;
176
177
if
(tr.
track_break
(60))
178
cout <<
"Frame 60 is a break\n"
;
179
//@} code
180
//@}
181
182
/** @name Naming Channels
183
@id tr-example-naming-channels
184
185
While channels can be accessed by their index, it is often useful
186
to give them names and refer to them by those names.
187
188
The set_channel_name() function sets the name of a single channel:
189
*/
190
//@{
191
//@{ code
192
tr.
set_channel_name
(
"F0"
, 0);
193
tr.
set_channel_name
(
"energy"
, 1);
194
//@} code
195
196
/** An alternative is to use a predefined set of channel names
197
stored in a <emphasis>map</emphasis>.A track map
198
is simply a String List strings which describe a channel name
199
configuration. The <method>resize</method> function can take
200
this and resize the number of channels to the number of channels
201
indicated in the map, and give each channel its name from the
202
map. For example:
203
*/
204
//@{ code
205
EST_StrList
map;
206
map.
append
(
"F0"
);
207
map.
append
(
"energy"
);
208
209
tr.
resize
(500, map);
// this makes a 2 channel track and sets the names to F0 and energy
210
//@} code
211
212
/** A convention is used for channels which comprise
213
components of a multi-dimensional analysis such as
214
cepstra. In such cases the channels are named
215
<replaceable>TYPE_I</replaceable>. The last coefficient is
216
always names <replaceable>TYPE_N</replaceable> regardless of
217
the number of coefficients. This is very useful in extracting
218
a set of related channels without needing to know the order
219
of the analysis.
220
221
For example, a track map might look like:
222
223
*/
224
//@{ code
225
226
map.
clear
();
227
map.
append
(
"F0"
);
228
map.
append
(
"energy"
);
229
230
map.
append
(
"cep_0"
);
231
map.
append
(
"cep_1"
);
232
map.
append
(
"cep_2"
);
233
map.
append
(
"cep_3"
);
234
map.
append
(
"cep_4"
);
235
map.
append
(
"cep_5"
);
236
map.
append
(
"cep_6"
);
237
map.
append
(
"cep_7"
);
238
map.
append
(
"cep_N"
);
239
240
tr.
resize
(500, map);
// makes a 11 channel track and sets the names
241
//@} code
242
243
/** This obviously gets unwieldy quite quickly, so the mapping
244
mechanism provides a short hand for multi-dimensional data.
245
246
*/
247
248
//@{ code
249
map.
clear
();
250
map.
append
(
"F0"
);
251
map.
append
(
"energy"
);
252
253
map.
append
(
"$cep-0+8"
);
254
255
tr.
resize
(500, map);
// does exactly as above
256
//@} code
257
258
/** Here $ indicates the special status, "cep" the name of the
259
coefficients, "-0" that the first is number 0 and "+8" that
260
there are 8 more to follow.
261
*/
262
263
//@}
264
265
266
/** @name Access single frames or single channels.
267
268
@id tr-example-frames-and-channels
269
270
Often functions perform their operations on only a single
271
frame or channel, and the track class provides a general
272
mechanism for doing this.
273
274
Single frames or channels can be accessed as EST_FVectors:
275
Given a track with 500 frames and 10 channels, the 50th frame
276
can be accessed as:
277
*/
278
//@{
279
//@{ code
280
EST_FVector
tmp_frame;
281
282
tr.
frame
(tmp_frame, 50);
283
//@} code
284
/** now tmp_frame is 10 element vector, which is
285
a window into tr: any changes to the contents of tmp_frame will
286
change tr. tmp_frame cannot be resized. (This operation can
287
be thought in standard C terms as tmp_frame being a pointer
288
to the 5th frame of tr).
289
</para> <para>
290
Likewise with channels:
291
*/
292
//@{ code
293
EST_FVector
tmp_channel;
294
295
tr.
channel
(tmp_channel, 5);
296
//@} code
297
/** Again, tmp_channel is 500 element vector, which is
298
a window into tr: any changes to the contents of tmp_channel will
299
change tr. tmp_channel cannot be resized.
300
</para><para>
301
Channels can also be extracted by name:
302
*/
303
//@{ code
304
tr.
channel
(tmp_channel,
"energy"
);
305
//@} code
306
/** not all the channels need be put into the temporary frame.
307
Imagine we have a track with a F0 channel,a energy channel and
308
10 cepstrum channels. The following makes a frame from the
309
50th frame, which only includes the cepstral information in
310
channels 2 through 11 */
311
//@{ code
312
tr.
frame
(tmp_frame, 50, 2, 9);
313
//@} code
314
/** Likewise, the 5th channel with only the last 100 frames can be set up
315
as: */
316
//@{ code
317
tr.
channel
(tmp_channel, 5, 400, 100);
318
//@} code
319
//@}
320
/** @name Access multiple frames or channels.
321
@id tr-example-sub-tracks
322
In addition to extracting single frames and channels, multiple
323
frame and channel portions can be extracted in a similar
324
way. In the following example, we make a sub-track sub, which
325
points to the entire cepstrum portion of a track (channels 2
326
through 11)
327
*/
328
//@{
329
//@{ code
330
EST_Track
sub;
331
332
tr.
sub_track
(sub, 0, EST_ALL, 2, 9);
333
334
//@} code
335
336
/** <parameter>sub</parameter> behaves exactly like a normal
337
track in every way, except that it cannot be resized. Its
338
contents behave like a point into the designated portion of
339
<parameter>tr</parameter>, so changing
340
<parameter>sub</parameter> will change<parameter>
341
tr</parameter>.
342
343
</para><para> The first argument is the
344
<parameter>sub</parameter> track. The second states the start
345
frame and the total number of frames required. EST_ALL is a
346
special constant that specifies that all the frames are
347
required here. The next argument is the start channel number
348
(remember channels are numbered from 0), and the last argument
349
is the total number of channels required. </para><para>
350
351
This facility is particularly useful for using standard
352
signal processing functions efficiently. For example,
353
the <function>melcep</function> in the signal processing library
354
takes a waveform and produces a mel-scale cepstrum. It determines
355
the order of the cepstral analysis by the number of channels in
356
the track it is given, which has already been allocated to have
357
the correct number of frames and channels.
358
359
</para><para> The following will process the waveform
360
<parameter>sig</parameter>, produce a 10th order mel cepstrum
361
and place the output in <parameter>sub</parameter>. (For
362
explanation of the other options see
363
<function>melcep</function> */
364
//@{ code
365
EST_Wave
sig;
366
367
melcep(sig, sub, 1.0, 20, 22);
368
//@} code
369
370
/** because we have made<parameter>sub</parameter> a window
371
into<parameter> tr</parameter>, the melcep function writes its
372
output into the correct location, i.e. channels 2-11 of tr. If
373
it were no for the sub_track facility, either a separate track
374
of the right size would be passed into melcep and then it
375
would be copied into tr (wasteful), or else tr would be passed
376
in and other arguments would have to specify which channels
377
should be written to (messy). </para><para>
378
379
Sub-tracks can also be set using channel names. The
380
following example does exactly as above, but is referenced by
381
the name of the first channel required and the number of
382
channels to follow: */
383
//@{ code
384
385
tr.
sub_track
(sub, 0, EST_ALL,
"cep_0"
,
"cep_N"
);
386
//@} code
387
/** and this specifies the end by a string also:
388
*/
389
//@{ code
390
tr.
sub_track
(sub, 0, EST_ALL,
"cep_0"
,
"cep_N"
);
391
//@} code
392
/** sub_tracks can be any set of continuous frames and
393
channels. For example if a word started at frame 43 and ended
394
and frame 86, the following would set a sub track to that
395
portion: */
396
//@{ code
397
398
tr.
sub_track
(sub, 47, 39,
"cep_0"
,
"cep_N"
);
399
400
//@} code
401
402
/** We can step through the frames of a Track using a standard
403
* iterator. The frames are returned as one-frame sub-tracks.
404
*/
405
406
//@{ code
407
EST_Track::Entries
frames;
408
409
// print out the time of every 50th track
410
cout <<
"Times:"
;
411
412
for
(frames.
begin
(tr); frames; ++frames)
413
{
414
const
EST_Track
&frame = *frames;
415
if
(frames.
n
() % 50 ==0)
416
cout <<
" "
<< frames.
n
() <<
"["
<< frame.
t
() <<
"]"
;
417
}
418
cout <<
"\n"
;
419
420
//@} code
421
422
/** The <function>channel</function>, <function>frame</function>
423
and <function>sub_track</function> functions are most commonly
424
used to write into a track using a convenient
425
sub-portion. Sometimes, however a simple copy is required
426
whose contents can be written without affecting the original.
427
428
The <member>copy_cub_track</member> function does this */
429
//@{ code
430
EST_Track
tr_copy;
431
432
// tr.copy_sub_track(tr_copy, 47, 39, "cep_0", "cep_N");
433
//@} code
434
435
/** Individual frames and channels can be copied out into
436
pre-allocated float * arrays as follows:
437
*/
438
//@{ code
439
float
*channel_buf, *frame_buf;
440
channel_buf =
new
float
[tr.
num_frames
()];
441
frame_buf =
new
float
[tr.
num_channels
()];
442
443
tr.
copy_channel_out
(5, channel_buf);
// copy channel 5 into channel_buf
444
tr.
copy_frame_out
(43, frame_buf);
// copy frame 4 into frame_buf
445
//@} code
446
447
/** Individual frames and channels can be copied into the track
448
from float * arrays as follows:
449
*/
450
//@{ code
451
tr.
copy_channel_in
(5, channel_buf);
// copy channel_buf into channel 5
452
tr.
copy_frame_in
(43, frame_buf);
// copy frame_buf into frame 4
453
//@} code
454
//@}
455
456
457
/** @name Auxiliary Channels
458
Auxiliary channels are used for storing frame information other than
459
amplitude coefficients, for example voicing decisions and points of
460
interest in the track.
461
462
Auxiliary channels always have the same number of frames as the
463
amplitude channels. They are resized by assigning names to the
464
channels that need to be created:
465
*/
466
//@{
467
//@{ code
468
469
470
EST_StrList
aux_names;
471
472
aux_names.
append
(
"voicing"
);
473
aux_names.
append
(
"join_points"
);
474
aux_names.
append
(
"cost"
);
475
476
tr.
resize_aux
(aux_names);
477
478
//@} code
479
/** The following fills in these three channels with some values:
480
*/
481
//@{ code
482
483
for
(i = 0; i < 500; ++i)
484
{
485
tr.aux(i,
"voicing"
) = i;
486
tr.aux(i,
"join_points"
) =
EST_String
(
"stuff"
);
487
tr.aux(i,
"cost"
) = 0.111;
488
}
489
//@} code
490
//@}
491
492
/** @name File I/O
493
Tracks in various formats can be saved and loaded:
494
495
Save as a HTK file:
496
*/
497
//@{
498
//@{ code
499
if
(tr.
save
(
"tmp/track.htk"
,
"htk"
) != write_ok)
500
EST_error(
"can't save htk file\n"
);
501
//@} code
502
/** Save as a EST file:
503
*/
504
//@{ code
505
if
(tr.
save
(
"tmp/track.est"
,
"est"
) != write_ok)
506
EST_error(
"can't save est file\n"
);
507
//@} code
508
/** Save as an ascii file:
509
*/
510
//@{ code
511
if
(tr.
save
(
"tmp/track.ascii"
,
"ascii"
) != write_ok)
512
EST_error(
"can't save ascii file\n"
);
513
//@} code
514
/** The file type is automatically determined from the file's
515
header during loading:
516
*/
517
//@{ code
518
519
EST_Track
tr2;
520
if
(tr2.
load
(
"tmp/track.htk"
) != read_ok)
521
EST_error(
"can't reload htk\n"
);
522
//@} code
523
524
/** If no header is found, the function assumes the
525
file is ascii data, with a fixed frame shift, arranged with rows
526
representing frames and columns channels. In this case, the
527
frame shift must be specified as an argument to this function:
528
*/
529
//@{ code
530
if
(tr.
load
(
"tmp/track.ascii"
, 0.01) != read_ok)
531
EST_error(
"can't reload ascii file\n"
);
532
//@} code
533
//@}
534
535
exit(0);
536
}
537
538
//@}
539
540
541
542
543
544
545
testsuite
track_example.cc
Generated on Wed Dec 24 2014 09:16:36 for Edinburgh Speech Tools by
1.8.3.1