00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00019
00020 #ifndef TESSERACT_TEXTORD_TABFIND_H__
00021 #define TESSERACT_TEXTORD_TABFIND_H__
00022
00023 #include "alignedblob.h"
00024 #include "tesscallback.h"
00025 #include "tabvector.h"
00026 #include "linefind.h"
00027
00028 extern BOOL_VAR_H(textord_tabfind_force_vertical_text, false,
00029 "Force using vertical text page mode");
00030 extern BOOL_VAR_H(textord_tabfind_vertical_horizontal_mix, true,
00031 "find horizontal lines such as headers in vertical page mode");
00032 extern double_VAR_H(textord_tabfind_vertical_text_ratio, 0.5,
00033 "Fraction of textlines deemed vertical to use vertical page mode");
00034 extern double_VAR_H(textord_tabfind_aligned_gap_fraction, 0.75,
00035 "Fraction of height used as a minimum gap for aligned blobs.");
00036
00037 class BLOBNBOX;
00038 class BLOBNBOX_LIST;
00039 class TO_BLOCK;
00040 class ScrollView;
00041 struct Pix;
00042
00043 namespace tesseract {
00044
00045 typedef TessResultCallback1<bool, int> WidthCallback;
00046
00047 struct AlignedBlobParams;
00048 class ColPartitionGrid;
00049
00051 const int kColumnWidthFactor = 20;
00052
00062 class TabFind : public AlignedBlob {
00063 public:
00064 TabFind(int gridsize, const ICOORD& bleft, const ICOORD& tright,
00065 TabVector_LIST* vlines, int vertical_x, int vertical_y,
00066 int resolution);
00067 virtual ~TabFind();
00068
00077 void InsertBlobsToGrid(bool h_spread, bool v_spread,
00078 BLOBNBOX_LIST* blobs,
00079 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00080
00088 bool InsertBlob(bool h_spread, bool v_spread, BLOBNBOX* blob,
00089 BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT>* grid);
00090
00091 void SetBlockRuleEdges(TO_BLOCK* block);
00092
00093
00094 void SetBlobRuleEdges(BLOBNBOX_LIST* blobs);
00095
00096
00097
00098
00099
00100
00101
00102
00103 int GutterWidth(int bottom_y, int top_y, const TabVector& v,
00104 bool ignore_unmergeables, int max_gutter_width,
00105 int* required_shift);
00109 void GutterWidthAndNeighbourGap(int tab_x, int mean_height,
00110 int max_gutter, bool left,
00111 BLOBNBOX* bbox, int* gutter_width,
00112 int* neighbour_gap);
00113
00120 int RightEdgeForBox(const TBOX& box, bool crossing, bool extended);
00124 int LeftEdgeForBox(const TBOX& box, bool crossing, bool extended);
00125
00142 TabVector* RightTabForBox(const TBOX& box, bool crossing, bool extended);
00146 TabVector* LeftTabForBox(const TBOX& box, bool crossing, bool extended);
00147
00152 bool CommonWidth(int width);
00157 static bool DifferentSizes(int size1, int size2);
00162 static bool VeryDifferentSizes(int size1, int size2);
00163
00167 WidthCallback* WidthCB() {
00168 return width_cb_;
00169 }
00170
00174 const ICOORD& image_origin() const {
00175 return image_origin_;
00176 }
00177
00178 protected:
00182 TabVector_LIST* vectors() {
00183 return &vectors_;
00184 }
00185 TabVector_LIST* dead_vectors() {
00186 return &dead_vectors_;
00187 }
00188
00194 bool FindTabVectors(TabVector_LIST* hlines,
00195 BLOBNBOX_LIST* image_blobs, TO_BLOCK* block,
00196 int min_gutter_width,
00197 ColPartitionGrid* part_grid,
00198 FCOORD* deskew, FCOORD* reskew);
00199
00200
00201
00202 void DontFindTabVectors(BLOBNBOX_LIST* image_blobs,
00203 TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00204
00205
00206
00207
00208 void TidyBlobs(TO_BLOCK* block);
00209
00210
00211 void SetupTabSearch(int x, int y, int* min_key, int* max_key);
00212
00216 ScrollView* DisplayTabVectors(ScrollView* tab_win);
00217
00218
00219
00220
00221
00222
00223 ScrollView* FindInitialTabVectors(BLOBNBOX_LIST* image_blobs,
00224 int min_gutter_width, TO_BLOCK* block);
00225
00226
00227 static void RotateBlobList(const FCOORD& rotation, BLOBNBOX_LIST* blobs);
00228
00229
00230
00231
00232
00233 void ResetForVerticalText(const FCOORD& rotate, const FCOORD& rerotate,
00234 TabVector_LIST* horizontal_lines,
00235 int* min_gutter_width);
00236
00237
00238
00239 void Reset();
00240
00241
00242
00243 void ReflectInYAxis();
00244
00245 private:
00246
00247
00248 ScrollView* FindTabBoxes(int min_gutter_width);
00249
00250
00251
00252 bool TestBoxForTabs(BLOBNBOX* bbox, int min_gutter_width);
00253
00254
00255
00256 bool ConfirmRaggedLeft(BLOBNBOX* bbox, int min_gutter);
00257
00258
00259 bool ConfirmRaggedRight(BLOBNBOX* bbox, int min_gutter);
00260
00261
00262 bool NothingYOverlapsInBox(const TBOX& search_box, const TBOX& target_box);
00263
00264
00265
00266 void FindAllTabVectors(int min_gutter_width);
00267
00268 int FindTabVectors(int search_size_multiple,
00269 TabAlignment alignment,
00270 int min_gutter_width,
00271 TabVector_LIST* vectors,
00272 int* vertical_x, int* vertical_y);
00273
00274
00275
00276
00277
00278
00279
00280 TabVector* FindTabVector(int search_size_multiple, int min_gutter_width,
00281 TabAlignment alignment,
00282 BLOBNBOX* bbox,
00283 int* vertical_x, int* vertical_y);
00284
00285
00286
00287 void SetVerticalSkewAndParellelize(int vertical_x, int vertical_y);
00288
00289
00290 void SortVectors();
00291
00292
00293 void EvaluateTabs();
00294
00295
00296
00297
00298 void ComputeColumnWidths(ScrollView* tab_win,
00299 ColPartitionGrid* part_grid);
00300
00301
00302 void ApplyPartitionsToColumnWidths(ColPartitionGrid* part_grid,
00303 STATS* col_widths);
00304
00305
00306
00307
00308 void MakeColumnWidths(int col_widths_size, STATS* col_widths);
00309
00310
00311 void MarkVerticalText();
00312
00313
00314
00315
00316 int FindMedianGutterWidth(TabVector_LIST* tab_vectors);
00317
00318
00319
00320
00321
00322
00323 BLOBNBOX* AdjacentBlob(const BLOBNBOX* bbox,
00324 bool look_left, bool ignore_images,
00325 double min_overlap_fraction,
00326 int gap_limit, int top_y, int bottom_y);
00327
00328
00329
00330
00331
00332 void AddPartnerVector(BLOBNBOX* left_blob, BLOBNBOX* right_blob,
00333 TabVector* left, TabVector* right);
00334
00339 void CleanupTabs();
00340
00346 bool Deskew(TabVector_LIST* hlines, BLOBNBOX_LIST* image_blobs,
00347 TO_BLOCK* block, FCOORD* deskew, FCOORD* reskew);
00348
00349
00350 void ComputeDeskewVectors(FCOORD* deskew, FCOORD* reskew);
00351
00356 void ApplyTabConstraints();
00357
00358 protected:
00359 ICOORD vertical_skew_;
00360 int resolution_;
00361 private:
00362 ICOORD image_origin_;
00363 TabVector_LIST vectors_;
00364 TabVector_IT v_it_;
00365 TabVector_LIST dead_vectors_;
00366 ICOORDELT_LIST column_widths_;
00368 WidthCallback* width_cb_;
00369
00370 GenericVector<BLOBNBOX*> left_tab_boxes_;
00371 GenericVector<BLOBNBOX*> right_tab_boxes_;
00372 };
00373
00374 }
00375
00376 #endif // TESSERACT_TEXTORD_TABFIND_H__