00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef TESSERACT_TRAINING_COMMONTRAINING_H__
00015 #define TESSERACT_TRAINING_COMMONTRAINING_H__
00016
00017 #include "oldlist.h"
00018 #include "cluster.h"
00019 #include "intproto.h"
00020 #include "featdefs.h"
00021
00022
00023 #ifdef USE_STD_NAMESPACE
00024 #include "params.h"
00025 # define INT_PARAM_FLAG(name, val, comment) \
00026 INT_VAR(FLAGS_##name, val, comment)
00027 # define DECLARE_INT_PARAM_FLAG(name) extern INT_VAR_H(FLAGS_##name, 0, "")
00028 # define STRING_PARAM_FLAG(name, val, comment) \
00029 STRING_VAR(FLAGS_##name, val, comment)
00030 # define DECLARE_STRING_PARAM_FLAG(name) \
00031 extern STRING_VAR_H(FLAGS_##name, "", "")
00032 # define c_str string
00033 #else
00034 #include "base/commandlineflags.h"
00035 # define INT_PARAM_FLAG(name, val, comment) \
00036 DEFINE_int32(name, val, comment)
00037 # define DECLARE_INT_PARAM_FLAG(name) DECLARE_int32(name)
00038 # define STRING_PARAM_FLAG(name, val, comment) \
00039 DEFINE_string(name, val, comment)
00040 # define DECLARE_STRING_PARAM_FLAG(name) DECLARE_string(name)
00041 #endif
00042
00043 namespace tesseract {
00044 class Classify;
00045 class MasterTrainer;
00046 class ShapeTable;
00047 }
00048
00050
00052
00053 extern FEATURE_DEFS_STRUCT feature_defs;
00054
00055
00056 extern CLUSTERCONFIG Config;
00057
00059
00061 typedef struct
00062 {
00063 char *Label;
00064 int SampleCount;
00065 int font_sample_count;
00066 LIST List;
00067 }
00068 LABELEDLISTNODE, *LABELEDLIST;
00069
00070 typedef struct
00071 {
00072 char* Label;
00073 int NumMerged[MAX_NUM_PROTOS];
00074 CLASS_TYPE Class;
00075 }MERGE_CLASS_NODE;
00076 typedef MERGE_CLASS_NODE* MERGE_CLASS;
00077
00078
00080
00082 void ParseArguments(int* argc, char*** argv);
00083
00084 namespace tesseract {
00085
00086 ShapeTable* LoadShapeTable(const STRING& file_prefix);
00087
00088 void WriteShapeTable(const STRING& file_prefix, const ShapeTable& shape_table);
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103 MasterTrainer* LoadTrainingData(int argc, const char* const * argv,
00104 bool replication,
00105 ShapeTable** shape_table,
00106 STRING* file_prefix);
00107 }
00108
00109 const char *GetNextFilename(int argc, const char* const * argv);
00110
00111 LABELEDLIST FindList(
00112 LIST List,
00113 char *Label);
00114
00115 LABELEDLIST NewLabeledList(
00116 const char *Label);
00117
00118 void ReadTrainingSamples(const FEATURE_DEFS_STRUCT& feature_defs,
00119 const char *feature_name, int max_samples,
00120 UNICHARSET* unicharset,
00121 FILE* file, LIST* training_samples);
00122
00123 void WriteTrainingSamples(
00124 const FEATURE_DEFS_STRUCT &FeatureDefs,
00125 char *Directory,
00126 LIST CharList,
00127 const char *program_feature_type);
00128
00129 void FreeTrainingSamples(
00130 LIST CharList);
00131
00132 void FreeLabeledList(
00133 LABELEDLIST LabeledList);
00134
00135 void FreeLabeledClassList(
00136 LIST ClassListList);
00137
00138 CLUSTERER *SetUpForClustering(
00139 const FEATURE_DEFS_STRUCT &FeatureDefs,
00140 LABELEDLIST CharSample,
00141 const char *program_feature_type);
00142
00143 LIST RemoveInsignificantProtos(
00144 LIST ProtoList,
00145 BOOL8 KeepSigProtos,
00146 BOOL8 KeepInsigProtos,
00147 int N);
00148
00149 void CleanUpUnusedData(
00150 LIST ProtoList);
00151
00152 void MergeInsignificantProtos(
00153 LIST ProtoList,
00154 const char *label,
00155 CLUSTERER *Clusterer,
00156 CLUSTERCONFIG *Config);
00157
00158 MERGE_CLASS FindClass(
00159 LIST List,
00160 const char *Label);
00161
00162 MERGE_CLASS NewLabeledClass(
00163 const char *Label);
00164
00165 void FreeTrainingSamples(
00166 LIST CharList);
00167
00168 CLASS_STRUCT* SetUpForFloat2Int(const UNICHARSET& unicharset,
00169 LIST LabeledClassList);
00170
00171 void Normalize(
00172 float *Values);
00173
00174 void FreeNormProtoList(
00175 LIST CharList);
00176
00177 void AddToNormProtosList(
00178 LIST* NormProtoList,
00179 LIST ProtoList,
00180 char *CharName);
00181
00182 int NumberOfProtos(
00183 LIST ProtoList,
00184 BOOL8 CountSigProtos,
00185 BOOL8 CountInsigProtos);
00186
00187
00188 void allocNormProtos();
00189 #endif // TESSERACT_TRAINING_COMMONTRAINING_H__