00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #ifndef TESS_LANG_MOD_EDGE_H
00024 #define TESS_LANG_MOD_EDGE_H
00025
00026 #include "dawg.h"
00027 #include "char_set.h"
00028
00029 #include "lang_mod_edge.h"
00030 #include "cube_reco_context.h"
00031 #include "cube_utils.h"
00032
00033
00034 #ifdef _HMSW32_H
00035 #define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000i64
00036 #define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000i64
00037 #define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000i64
00038 #else
00039 #define LEAD_PUNC_EDGE_REF_MASK (inT64) 0x0000000100000000ll
00040 #define TRAIL_PUNC_EDGE_REF_MASK (inT64) 0x0000000200000000ll
00041 #define TRAIL_PUNC_REPEAT_MASK (inT64) 0xffff000000000000ll
00042 #endif
00043
00044
00045 #define NUMBER_STATE_SHIFT 0
00046 #define NUMBER_STATE_MASK 0x0000000fl
00047 #define NUMBER_LITERAL_SHIFT 4
00048 #define NUMBER_LITERAL_MASK 0x000000f0l
00049 #define NUMBER_REPEAT_SHIFT 8
00050 #define NUMBER_REPEAT_MASK 0x00000f00l
00051 #define NUM_TRM -99
00052 #define TRAIL_PUNC_REPEAT_SHIFT 48
00053
00054 #define IsLeadingPuncEdge(edge_mask) \
00055 ((edge_mask & LEAD_PUNC_EDGE_REF_MASK) != 0)
00056 #define IsTrailingPuncEdge(edge_mask) \
00057 ((edge_mask & TRAIL_PUNC_EDGE_REF_MASK) != 0)
00058 #define TrailingPuncCount(edge_mask) \
00059 ((edge_mask & TRAIL_PUNC_REPEAT_MASK) >> TRAIL_PUNC_REPEAT_SHIFT)
00060 #define TrailingPuncEdgeMask(Cnt) \
00061 (TRAIL_PUNC_EDGE_REF_MASK | ((Cnt) << TRAIL_PUNC_REPEAT_SHIFT))
00062
00063
00064 #define DAWG_OOD 0
00065 #define DAWG_NUMBER 1
00066
00067 namespace tesseract {
00068 class TessLangModEdge : public LangModEdge {
00069 public:
00070
00071 TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
00072 EDGE_REF edge, int class_id);
00073 TessLangModEdge(CubeRecoContext *cntxt, const Dawg *edge_array,
00074 EDGE_REF start_edge_idx, EDGE_REF end_edge_idx,
00075 int class_id);
00076 TessLangModEdge(CubeRecoContext *cntxt, int class_id);
00077 ~TessLangModEdge() {}
00078
00079
00080 inline bool IsRoot() const {
00081 return root_;
00082 }
00083 inline void SetRoot(bool flag) { root_ = flag; }
00084
00085 inline bool IsOOD() const {
00086 return (dawg_ == (Dawg *)DAWG_OOD);
00087 }
00088
00089 inline bool IsNumber() const {
00090 return (dawg_ == (Dawg *)DAWG_NUMBER);
00091 }
00092
00093 inline bool IsEOW() const {
00094 return (IsTerminal() || (dawg_->end_of_word(end_edge_) != 0));
00095 }
00096
00097 inline const Dawg *GetDawg() const { return dawg_; }
00098 inline EDGE_REF StartEdge() const { return start_edge_; }
00099 inline EDGE_REF EndEdge() const { return end_edge_; }
00100 inline EDGE_REF EdgeMask() const { return edge_mask_; }
00101 inline const char_32 * EdgeString() const { return str_; }
00102 inline int ClassID () const { return class_id_; }
00103 inline int PathCost() const { return path_cost_; }
00104 inline void SetEdgeMask(EDGE_REF edge_mask) { edge_mask_ = edge_mask; }
00105 inline void SetDawg(Dawg *dawg) { dawg_ = dawg; }
00106 inline void SetStartEdge(EDGE_REF edge_idx) { start_edge_ = edge_idx; }
00107 inline void SetEndEdge(EDGE_REF edge_idx) { end_edge_ = edge_idx; }
00108
00109
00110
00111
00112 inline bool IsTerminal() const {
00113 return (IsOOD() || IsNumber() || IsTrailingPuncEdge(start_edge_) ||
00114 dawg_->next_node(end_edge_) == 0);
00115 }
00116
00117
00118
00119
00120 inline int SignalCnt() const {
00121 return 2;
00122 }
00123
00124
00125 inline double SignalWgt(int signal) const {
00126 CubeTuningParams *params =
00127 reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
00128 if (params != NULL) {
00129 switch (signal) {
00130 case 0:
00131 return params->OODWgt();
00132 break;
00133
00134 case 1:
00135 return params->NumWgt();
00136 break;
00137 }
00138 }
00139
00140 return 0.0;
00141 }
00142
00143
00144 void SetSignalWgt(int signal, double wgt) {
00145 CubeTuningParams *params =
00146 reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
00147 if (params != NULL) {
00148 switch (signal) {
00149 case 0:
00150 params->SetOODWgt(wgt);
00151 break;
00152
00153 case 1:
00154 params->SetNumWgt(wgt);
00155 break;
00156 }
00157 }
00158 }
00159
00160
00161 int Signal(int signal) {
00162 switch (signal) {
00163 case 0:
00164 return IsOOD() ? MIN_PROB_COST : 0;
00165 break;
00166
00167 case 1:
00168 return IsNumber() ? MIN_PROB_COST : 0;
00169 break;
00170
00171 default:
00172 return 0;
00173 }
00174 }
00175
00176
00177
00178 inline unsigned int Hash() const {
00179 return static_cast<unsigned int>(((start_edge_ | end_edge_) ^
00180 ((reinterpret_cast<unsigned long int>(dawg_)))) ^
00181 ((unsigned int)edge_mask_) ^
00182 class_id_);
00183 }
00184
00185
00186 char *Description() const;
00187
00188
00189 inline bool IsIdentical(LangModEdge *lang_mod_edge) const {
00190 return (class_id_ ==
00191 reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->class_id_ &&
00192 str_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->str_ &&
00193 dawg_ == reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->dawg_ &&
00194 start_edge_ ==
00195 reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->start_edge_ &&
00196 end_edge_ ==
00197 reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->end_edge_ &&
00198 edge_mask_ ==
00199 reinterpret_cast<TessLangModEdge *>(lang_mod_edge)->edge_mask_);
00200 }
00201
00202
00203 static int CreateChildren(CubeRecoContext *cntxt,
00204 const Dawg *edges,
00205 NODE_REF edge_reg,
00206 LangModEdge **lm_edges);
00207
00208 private:
00209 bool root_;
00210 CubeRecoContext *cntxt_;
00211 const Dawg *dawg_;
00212 EDGE_REF start_edge_;
00213 EDGE_REF end_edge_;
00214 EDGE_REF edge_mask_;
00215 int path_cost_;
00216 int class_id_;
00217 const char_32 * str_;
00218
00219 inline int Cost() const {
00220 if (cntxt_ != NULL) {
00221 CubeTuningParams *params =
00222 reinterpret_cast<CubeTuningParams *>(cntxt_->Params());
00223 if (dawg_ == (Dawg *)DAWG_OOD) {
00224 return static_cast<int>(params->OODWgt() * MIN_PROB_COST);
00225 } else if (dawg_ == (Dawg *)DAWG_NUMBER) {
00226 return static_cast<int>(params->NumWgt() * MIN_PROB_COST);
00227 }
00228 }
00229 return 0;
00230 }
00231 };
00232 }
00233
00234 #endif // TESS_LANG_MOD_EDGE_H