00001 00002 // File: ltrresultiterator.h 00003 // Description: Iterator for tesseract results in strict left-to-right 00004 // order that avoids using tesseract internal data structures. 00005 // Author: Ray Smith 00006 // Created: Fri Feb 26 11:01:06 PST 2010 00007 // 00008 // (C) Copyright 2010, Google Inc. 00009 // Licensed under the Apache License, Version 2.0 (the "License"); 00010 // you may not use this file except in compliance with the License. 00011 // You may obtain a copy of the License at 00012 // http://www.apache.org/licenses/LICENSE-2.0 00013 // Unless required by applicable law or agreed to in writing, software 00014 // distributed under the License is distributed on an "AS IS" BASIS, 00015 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 00016 // See the License for the specific language governing permissions and 00017 // limitations under the License. 00018 // 00020 00021 #ifndef TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00022 #define TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__ 00023 00024 #include "pageiterator.h" 00025 #include "unicharset.h" 00026 00027 class BLOB_CHOICE_IT; 00028 class WERD_RES; 00029 00030 namespace tesseract { 00031 00032 class Tesseract; 00033 00034 // Class to iterate over tesseract results, providing access to all levels 00035 // of the page hierarchy, without including any tesseract headers or having 00036 // to handle any tesseract structures. 00037 // WARNING! This class points to data held within the TessBaseAPI class, and 00038 // therefore can only be used while the TessBaseAPI class still exists and 00039 // has not been subjected to a call of Init, SetImage, Recognize, Clear, End 00040 // DetectOS, or anything else that changes the internal PAGE_RES. 00041 // See apitypes.h for the definition of PageIteratorLevel. 00042 // See also base class PageIterator, which contains the bulk of the interface. 00043 // LTRResultIterator adds text-specific methods for access to OCR output. 00044 00045 class LTRResultIterator : public PageIterator { 00046 friend class ChoiceIterator; 00047 public: 00048 // page_res and tesseract come directly from the BaseAPI. 00049 // The rectangle parameters are copied indirectly from the Thresholder, 00050 // via the BaseAPI. They represent the coordinates of some rectangle in an 00051 // original image (in top-left-origin coordinates) and therefore the top-left 00052 // needs to be added to any output boxes in order to specify coordinates 00053 // in the original image. See TessBaseAPI::SetRectangle. 00054 // The scale and scaled_yres are in case the Thresholder scaled the image 00055 // rectangle prior to thresholding. Any coordinates in tesseract's image 00056 // must be divided by scale before adding (rect_left, rect_top). 00057 // The scaled_yres indicates the effective resolution of the binary image 00058 // that tesseract has been given by the Thresholder. 00059 // After the constructor, Begin has already been called. 00060 LTRResultIterator(PAGE_RES* page_res, Tesseract* tesseract, 00061 int scale, int scaled_yres, 00062 int rect_left, int rect_top, 00063 int rect_width, int rect_height); 00064 virtual ~LTRResultIterator(); 00065 00066 // LTRResultIterators may be copied! This makes it possible to iterate over 00067 // all the objects at a lower level, while maintaining an iterator to 00068 // objects at a higher level. These constructors DO NOT CALL Begin, so 00069 // iterations will continue from the location of src. 00070 // TODO: For now the copy constructor and operator= only need the base class 00071 // versions, but if new data members are added, don't forget to add them! 00072 00073 // ============= Moving around within the page ============. 00074 00075 // See PageIterator. 00076 00077 // ============= Accessing data ==============. 00078 00079 // Returns the null terminated UTF-8 encoded text string for the current 00080 // object at the given level. Use delete [] to free after use. 00081 char* GetUTF8Text(PageIteratorLevel level) const; 00082 00083 // Set the string inserted at the end of each text line. "\n" by default. 00084 void SetLineSeparator(const char *new_line); 00085 00086 // Set the string inserted at the end of each paragraph. "\n" by default. 00087 void SetParagraphSeparator(const char *new_para); 00088 00089 // Returns the mean confidence of the current object at the given level. 00090 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00091 float Confidence(PageIteratorLevel level) const; 00092 00093 // ============= Functions that refer to words only ============. 00094 00095 // Returns the font attributes of the current word. If iterating at a higher 00096 // level object than words, eg textlines, then this will return the 00097 // attributes of the first word in that textline. 00098 // The actual return value is a string representing a font name. It points 00099 // to an internal table and SHOULD NOT BE DELETED. Lifespan is the same as 00100 // the iterator itself, ie rendered invalid by various members of 00101 // TessBaseAPI, including Init, SetImage, End or deleting the TessBaseAPI. 00102 // Pointsize is returned in printers points (1/72 inch.) 00103 const char* WordFontAttributes(bool* is_bold, 00104 bool* is_italic, 00105 bool* is_underlined, 00106 bool* is_monospace, 00107 bool* is_serif, 00108 bool* is_smallcaps, 00109 int* pointsize, 00110 int* font_id) const; 00111 00112 // Return the name of the language used to recognize this word. 00113 // On error, NULL. Do not delete this pointer. 00114 const char* WordRecognitionLanguage() const; 00115 00116 // Return the overall directionality of this word. 00117 StrongScriptDirection WordDirection() const; 00118 00119 // Returns true if the current word was found in a dictionary. 00120 bool WordIsFromDictionary() const; 00121 00122 // Returns true if the current word is numeric. 00123 bool WordIsNumeric() const; 00124 00125 // Returns true if the word contains blamer information. 00126 bool HasBlamerInfo() const; 00127 00128 // Returns the pointer to ParamsTrainingBundle stored in the BlamerBundle 00129 // of the current word. 00130 void *GetParamsTrainingBundle() const; 00131 00132 // Returns a pointer to the string with blamer information for this word. 00133 // Assumes that the word's blamer_bundle is not NULL. 00134 const char *GetBlamerDebug() const; 00135 00136 // Returns a pointer to the string with misadaption information for this word. 00137 // Assumes that the word's blamer_bundle is not NULL. 00138 const char *GetBlamerMisadaptionDebug() const; 00139 00140 // Returns a null terminated UTF-8 encoded truth string for the current word. 00141 // Use delete [] to free after use. 00142 char* WordTruthUTF8Text() const; 00143 00144 // Returns a pointer to serialized choice lattice. 00145 // Fills lattice_size with the number of bytes in lattice data. 00146 const char *WordLattice(int *lattice_size) const; 00147 00148 // ============= Functions that refer to symbols only ============. 00149 00150 // Returns true if the current symbol is a superscript. 00151 // If iterating at a higher level object than symbols, eg words, then 00152 // this will return the attributes of the first symbol in that word. 00153 bool SymbolIsSuperscript() const; 00154 // Returns true if the current symbol is a subscript. 00155 // If iterating at a higher level object than symbols, eg words, then 00156 // this will return the attributes of the first symbol in that word. 00157 bool SymbolIsSubscript() const; 00158 // Returns true if the current symbol is a dropcap. 00159 // If iterating at a higher level object than symbols, eg words, then 00160 // this will return the attributes of the first symbol in that word. 00161 bool SymbolIsDropcap() const; 00162 00163 protected: 00164 const char *line_separator_; 00165 const char *paragraph_separator_; 00166 }; 00167 00168 // Class to iterate over the classifier choices for a single RIL_SYMBOL. 00169 class ChoiceIterator { 00170 public: 00171 // Construction is from a LTRResultIterator that points to the symbol of 00172 // interest. The ChoiceIterator allows a one-shot iteration over the 00173 // choices for this symbol and after that is is useless. 00174 explicit ChoiceIterator(const LTRResultIterator& result_it); 00175 ~ChoiceIterator(); 00176 00177 // Moves to the next choice for the symbol and returns false if there 00178 // are none left. 00179 bool Next(); 00180 00181 // ============= Accessing data ==============. 00182 00183 // Returns the null terminated UTF-8 encoded text string for the current 00184 // choice. 00185 // NOTE: Unlike LTRResultIterator::GetUTF8Text, the return points to an 00186 // internal structure and should NOT be delete[]ed to free after use. 00187 const char* GetUTF8Text() const; 00188 00189 // Returns the confidence of the current choice. 00190 // The number should be interpreted as a percent probability. (0.0f-100.0f) 00191 float Confidence() const; 00192 00193 private: 00194 // Pointer to the WERD_RES object owned by the API. 00195 WERD_RES* word_res_; 00196 // Iterator over the blob choices. 00197 BLOB_CHOICE_IT* choice_it_; 00198 }; 00199 00200 } // namespace tesseract. 00201 00202 #endif // TESSERACT_CCMAIN_LTR_RESULT_ITERATOR_H__