Edinburgh Speech Tools
2.4-release
All
Classes
Functions
Variables
Typedefs
Enumerations
Enumerator
Friends
Pages
token_example.cc
1
/************************************************************************/
2
/* */
3
/* Centre for Speech Technology Research */
4
/* University of Edinburgh, UK */
5
/* Copyright (c) 1996,1997 */
6
/* All Rights Reserved. */
7
/* */
8
/* Permission is hereby granted, free of charge, to use and distribute */
9
/* this software and its documentation without restriction, including */
10
/* without limitation the rights to use, copy, modify, merge, publish, */
11
/* distribute, sublicense, and/or sell copies of this work, and to */
12
/* permit persons to whom this work is furnished to do so, subject to */
13
/* the following conditions: */
14
/* 1. The code must retain the above copyright notice, this list of */
15
/* conditions and the following disclaimer. */
16
/* 2. Any modifications must be clearly marked as such. */
17
/* 3. Original authors' names are not deleted. */
18
/* 4. The authors' names are not used to endorse or promote products */
19
/* derived from this software without specific prior written */
20
/* permission. */
21
/* */
22
/* THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK */
23
/* DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING */
24
/* ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT */
25
/* SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE */
26
/* FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES */
27
/* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN */
28
/* AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, */
29
/* ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF */
30
/* THIS SOFTWARE. */
31
/* */
32
/************************************************************************/
33
/* Author: Alan W Black */
34
/* Date: May 1997 */
35
/************************************************************************/
36
/* */
37
/* Example of reading a file using the tokenizer */
38
/* */
39
/************************************************************************/
40
41
#include <cstdlib>
42
#include "EST_Token.h"
43
44
#if defined(DATAC)
45
# define __STRINGIZE(X) #X
46
# define DATA __STRINGIZE(DATAC)
47
#endif
48
49
int
main(
int
argc,
char
**argv)
50
{
51
// Simple program to read all the tokens in the named file
52
// a print a summary of them
53
EST_TokenStream
ts;
54
int
tokens, alices, quotes;
55
EST_Token
t;
56
EST_String
fname;
57
58
if
(argc > 2)
59
{
60
cerr << argv[0] <<
": wrong number of arguments\n"
;
61
exit(-1);
62
}
63
else
if
(argc == 2)
64
fname = argv[1];
65
else
66
fname = DATA
"/alice"
;
67
68
if
(ts.
open
(fname) == -1)
69
{
70
cerr << argv[0] <<
": can't open input file \""
<< argv[1] <<
71
"\"\n"
;
72
exit(-1);
73
}
74
75
// Control of whitespace characters, single character symbols,
76
// pre and post punctuation may be set here.
77
78
// The defaults are standard whitespace, and nothing for the rest
79
// (this is like awk's basic tokenizer). For language analysis
80
// you'll probably want to modify the punctuation
81
// \173 is '{', it is inserted by number because of a doc++ problem.
82
83
ts.
set_PrePunctuationSymbols
(
"\173[(\"'"
);
84
ts.
set_PunctuationSymbols
(EST_Token_Default_PunctuationSymbols);
85
86
// Note you may set quotes so quoted tokens are read as single
87
// tokens (a la C)
88
89
for
(tokens=quotes=alices=0; !ts.
eof
(); tokens++)
90
{
91
t = ts.
get
();
92
if
(t ==
"Alice"
)
93
alices++;
94
if
(t.prepunctuation().
contains
(
"\""
))
95
quotes++;
96
}
97
98
printf(
"Input file contains:\n"
);
99
printf(
" %5d tokens\n"
,tokens);
100
printf(
" %5d tokens preceeded by double quotes\n"
,quotes);
101
printf(
" %5d occurrences of Alice\n"
,alices);
102
103
return
0;
104
}
105
106
testsuite
token_example.cc
Generated on Wed Dec 24 2014 09:16:36 for Edinburgh Speech Tools by
1.8.3.1