44 #include "EST_Wagon.h"
45 #include "EST_cutils.h"
46 #include "EST_multistats.h"
47 #include "EST_Token.h"
48 #include "EST_cmd_line.h"
50 static int wagon_test_main(
int argc,
char **argv);
51 static LISP find_feature_value(
const char *feature,
52 LISP vector, LISP description);
53 static LISP wagon_vector_predict(LISP tree, LISP vector, LISP description);
56 LISP tree, LISP description,
int all_info);
58 LISP tree, LISP description);
60 LISP tree, LISP description);
83 int main(
int argc,
char **argv)
86 wagon_test_main(argc,argv);
92 static int wagon_test_main(
int argc,
char **argv)
97 LISP description,tree=NIL;;
104 "Summary: program to test CART models on data\n"+
105 "-desc <ifile> Field description file\n"+
106 "-data <ifile> Datafile, one vector per line\n"+
107 "-tree <ifile> File containing CART tree\n"+
109 " track for vertex indices\n"+
110 "-predict Predict for each vector returning full vector\n"+
111 "-predict_val Predict for each vector returning just value\n"+
112 "-predictee <string>\n"+
113 " name of field to predict (default is first field)\n"+
114 "-heap <int> {210000}\n"+
115 " Set size of Lisp heap, should not normally need\n"+
116 " to be changed from its default\n"+
117 "-o <ofile> File to save output in\n",
120 siod_init(al.
ival(
"-heap"));
124 gc_protect(&description);
125 description = car(vload(al.
val(
"-desc"),1));
129 cerr << argv[0] <<
": no description file specified" << endl;
136 tree = car(vload(al.
val(
"-tree"),1));
139 cerr << argv[0] <<
": no tree found in \"" << al.
val(
"-tree")
146 cerr << argv[0] <<
": no tree file specified" << endl;
152 if (data.
open(al.
val(
"-data")) != 0)
154 cerr << argv[0] <<
": can't open data file \"" <<
155 al.
val(
"-data") <<
"\" for input." << endl;
161 cerr << argv[0] <<
": no data file specified" << endl;
167 wgn_VertexTrack.
load(al.
val(
"-track"));
172 if ((wgn_output = fopen(al.
val(
"-o"),
"w")) == NULL)
174 cerr << argv[0] <<
": can't open output file \"" <<
175 al.
val(
"-o") <<
"\"" << endl;
185 wgn_predictee_name = al.
val(
"-predictee");
186 for (l=description,i=0; l != NIL; l=cdr(l),i++)
187 if (streq(wgn_predictee_name,get_c_string(car(car(l)))))
194 cerr << argv[0] <<
": predictee \"" << wgn_predictee <<
195 "\" not in description\n";
198 const char *predict_type =
199 get_c_string(car(cdr(siod_nth(wgn_predictee,description))));
202 simple_predict(data,wgn_output,tree,description,FALSE);
203 else if (al.
present(
"-predict_val"))
204 simple_predict(data,wgn_output,tree,description,TRUE);
205 else if (streq(predict_type,
"float") ||
206 streq(predict_type,
"int"))
207 test_tree_float(data,wgn_output,tree,description);
209 else if (streq(predict_type,
"vector"))
210 test_tree_vector(data,wgn_output,tree,description);
213 test_tree_class(data,wgn_output,tree,description);
215 if (wgn_output != stdout)
229 for (d=description; d != NIL; d=cdr(d))
233 if ((d != description) && (t.whitespace().
contains(
"\n")))
235 cerr <<
"wagon_test: unexpected newline within vector " <<
236 t.
row() <<
" wrong number of features" << endl;
239 if (streq(get_c_string(car(cdr(car(d)))),
"float") ||
240 streq(get_c_string(car(cdr(car(d)))),
"int"))
241 v = cons(flocons(atof(t.string())),v);
242 else if ((streq(get_c_string(car(cdr(car(d)))),
"_other_")) &&
243 (siod_member_str(t.string(),cdr(car(d))) == NIL))
244 v = cons(strintern(
"_other_"),v);
246 v = cons(strintern(t.string()),v);
253 LISP tree, LISP description,
int all_info)
258 for (vector=get_data_vector(data,description);
259 vector != NIL; vector=get_data_vector(data,description))
261 predict = wagon_vector_predict(tree,vector,description);
263 val = siod_sprint(car(reverse(predict)));
265 val = siod_sprint(predict);
266 fprintf(output,
"%s\n",(
const char *)val);
271 LISP tree, LISP description)
274 float predict_val,real_val;
279 for (vector=get_data_vector(data,description);
280 vector != NIL; vector=get_data_vector(data,description))
282 predict = wagon_vector_predict(tree,vector,description);
283 predict_val = get_c_float(car(reverse(predict)));
284 real_val = get_c_float(siod_nth(wgn_predictee,vector));
287 error = predict_val-real_val;
290 xx += predict_val*predict_val;
291 yy += real_val*real_val;
292 xy += predict_val*real_val;
299 fprintf(output,
";; RMSE %1.4f Correlation is %1.4f Mean (abs) Error %1.4f (%1.4f)\n",
307 LISP tree, LISP description)
313 LISP vector,w,predict;
317 for (vector=get_data_vector(data,description);
318 vector != NIL; vector=get_data_vector(data,description))
320 predict = wagon_vector_predict(tree,vector,description);
321 predict_class = get_c_string(car(reverse(predict)));
322 real_class = get_c_string(siod_nth(wgn_predictee,vector));
323 prob = get_c_float(car(cdr(siod_assoc_str(real_class,
330 pairs.
add_item(real_class,predict_class,1);
332 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
333 lex.
append(get_c_string(car(w)));
336 print_confusion(m,pairs,lex);
337 fprintf(stdout,
";; entropy %g perplexity %g\n",
338 (-1*(H/Q)),pow(2.0,(-1*(H/Q))));
343 LISP tree, LISP description)
351 LISP vector,w,predict;
355 for (vector=get_data_vector(data,description);
356 vector != NIL; vector=get_data_vector(data,description))
358 predict = wagon_vector_predict(tree,vector,description);
359 predict_class = get_c_string(car(reverse(predict)));
360 real_class = get_c_string(siod_nth(wgn_predictee,vector));
361 prob = get_c_float(car(cdr(siod_assoc_str(real_class,
368 pairs.
add_item(real_class,predict_class,1);
370 for (w=cdr(siod_nth(wgn_predictee,description)); w != NIL; w = cdr(w))
371 lex.
append(get_c_string(car(w)));
374 print_confusion(m,pairs,lex);
375 fprintf(stdout,
";; entropy %g perplexity %g\n",
376 (-1*(H/Q)),pow(2.0,(-1*(H/Q))));
381 static LISP wagon_vector_predict(LISP tree, LISP vector, LISP description)
385 if (cdr(tree) == NIL)
388 LISP value = find_feature_value(wgn_ques_feature(car(tree)),
389 vector, description);
391 if (wagon_ask_question(car(tree),value))
393 return wagon_vector_predict(car(cdr(tree)),vector,description);
396 return wagon_vector_predict(car(cdr(cdr(tree))),vector,description);
399 static LISP find_feature_value(
const char *feature,
400 LISP vector, LISP description)
404 for (v=vector,d=description; v != NIL; v=cdr(v),d=cdr(d))
405 if (streq(feature,get_c_string(car(car(d)))))
408 cerr <<
"wagon_test: can't find feature \"" << feature <<
409 "\" in description" << endl;