00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef MAKEROW_H
00021 #define MAKEROW_H
00022
00023 #include "params.h"
00024 #include "ocrblock.h"
00025 #include "blobs.h"
00026 #include "blobbox.h"
00027 #include "statistc.h"
00028 #include "notdll.h"
00029
00030 enum OVERLAP_STATE
00031 {
00032 ASSIGN,
00033 REJECT,
00034 NEW_ROW
00035 };
00036
00037 enum ROW_CATEGORY {
00038 ROW_ASCENDERS_FOUND,
00039 ROW_DESCENDERS_FOUND,
00040 ROW_UNKNOWN,
00041 ROW_INVALID,
00042 };
00043
00044 extern BOOL_VAR_H (textord_show_initial_rows, FALSE,
00045 "Display row accumulation");
00046 extern BOOL_VAR_H (textord_show_parallel_rows, FALSE,
00047 "Display page correlated rows");
00048 extern BOOL_VAR_H (textord_show_expanded_rows, FALSE,
00049 "Display rows after expanding");
00050 extern BOOL_VAR_H (textord_show_final_rows, FALSE,
00051 "Display rows after final fitting");
00052 extern BOOL_VAR_H (textord_show_final_blobs, FALSE,
00053 "Display blob bounds after pre-ass");
00054 extern BOOL_VAR_H (textord_test_landscape, FALSE, "Tests refer to land/port");
00055 extern BOOL_VAR_H (textord_parallel_baselines, TRUE,
00056 "Force parallel baselines");
00057 extern BOOL_VAR_H (textord_straight_baselines, FALSE,
00058 "Force straight baselines");
00059 extern BOOL_VAR_H (textord_quadratic_baselines, FALSE,
00060 "Use quadratic splines");
00061 extern BOOL_VAR_H (textord_old_baselines, TRUE, "Use old baseline algorithm");
00062 extern BOOL_VAR_H (textord_old_xheight, TRUE, "Use old xheight algorithm");
00063 extern BOOL_VAR_H (textord_fix_xheight_bug, TRUE, "Use spline baseline");
00064 extern BOOL_VAR_H (textord_fix_makerow_bug, TRUE,
00065 "Prevent multiple baselines");
00066 extern BOOL_VAR_H (textord_cblob_blockocc, TRUE,
00067 "Use new projection for underlines");
00068 extern BOOL_VAR_H (textord_debug_xheights, FALSE, "Test xheight algorithms");
00069 extern INT_VAR_H (textord_test_x, 0, "coord of test pt");
00070 extern INT_VAR_H (textord_test_y, 0, "coord of test pt");
00071 extern INT_VAR_H (textord_min_blobs_in_row, 4,
00072 "Min blobs before gradient counted");
00073 extern INT_VAR_H (textord_spline_minblobs, 8,
00074 "Min blobs in each spline segment");
00075 extern INT_VAR_H (textord_spline_medianwin, 6,
00076 "Size of window for spline segmentation");
00077 extern INT_VAR_H (textord_min_xheight, 10, "Min credible pixel xheight");
00078 extern double_VAR_H (textord_spline_shift_fraction, 0.02,
00079 "Fraction of line spacing for quad");
00080 extern double_VAR_H (textord_spline_outlier_fraction, 0.1,
00081 "Fraction of line spacing for outlier");
00082 extern double_VAR_H (textord_skew_ile, 0.5, "Ile of gradients for page skew");
00083 extern double_VAR_H (textord_skew_lag, 0.75,
00084 "Lag for skew on row accumulation");
00085 extern double_VAR_H (textord_linespace_iqrlimit, 0.2,
00086 "Max iqr/median for linespace");
00087 extern double_VAR_H (textord_width_limit, 8,
00088 "Max width of blobs to make rows");
00089 extern double_VAR_H (textord_chop_width, 1.5, "Max width before chopping");
00090 extern double_VAR_H (textord_minxh, 0.25,
00091 "fraction of linesize for min xheight");
00092 extern double_VAR_H (textord_min_linesize, 1.25,
00093 "* blob height for initial linesize");
00094 extern double_VAR_H (textord_excess_blobsize, 1.3,
00095 "New row made if blob makes row this big");
00096 extern double_VAR_H (textord_occupancy_threshold, 0.4,
00097 "Fraction of neighbourhood");
00098 extern double_VAR_H (textord_underline_width, 2.0,
00099 "Multiple of line_size for underline");
00100 extern double_VAR_H(textord_min_blob_height_fraction, 0.75,
00101 "Min blob height/top to include blob top into xheight stats");
00102 extern double_VAR_H (textord_xheight_mode_fraction, 0.4,
00103 "Min pile height to make xheight");
00104 extern double_VAR_H (textord_ascheight_mode_fraction, 0.15,
00105 "Min pile height to make ascheight");
00106 extern double_VAR_H (textord_ascx_ratio_min, 1.2, "Min cap/xheight");
00107 extern double_VAR_H (textord_ascx_ratio_max, 1.7, "Max cap/xheight");
00108 extern double_VAR_H (textord_descx_ratio_min, 0.15, "Min desc/xheight");
00109 extern double_VAR_H (textord_descx_ratio_max, 0.6, "Max desc/xheight");
00110 extern double_VAR_H (textord_xheight_error_margin, 0.1, "Accepted variation");
00111 extern INT_VAR_H (textord_lms_line_trials, 12, "Number of linew fits to do");
00112 extern BOOL_VAR_H (textord_new_initial_xheight, TRUE,
00113 "Use test xheight mechanism");
00114
00115 inline void get_min_max_xheight(int block_linesize,
00116 int *min_height, int *max_height) {
00117 *min_height = static_cast<inT32>(floor(block_linesize * textord_minxh));
00118 if (*min_height < textord_min_xheight) *min_height = textord_min_xheight;
00119 *max_height = static_cast<inT32>(ceil(block_linesize * 3.0));
00120 }
00121
00122 inline ROW_CATEGORY get_row_category(const TO_ROW *row) {
00123 if (row->xheight <= 0) return ROW_INVALID;
00124 return (row->ascrise > 0) ? ROW_ASCENDERS_FOUND :
00125 (row->descdrop != 0) ? ROW_DESCENDERS_FOUND : ROW_UNKNOWN;
00126 }
00127
00128 inline bool within_error_margin(float test, float num, float margin) {
00129 return (test >= num * (1 - margin) && test <= num * (1 + margin));
00130 }
00131
00132 void fill_heights(TO_ROW *row, float gradient, int min_height,
00133 int max_height, STATS *heights, STATS *floating_heights);
00134
00135 float make_single_row(ICOORD page_tr, TO_BLOCK* block,
00136 TO_BLOCK_LIST* blocks);
00137 float make_rows(ICOORD page_tr,
00138 TO_BLOCK_LIST *port_blocks);
00139 void make_initial_textrows(ICOORD page_tr,
00140 TO_BLOCK *block,
00141 FCOORD rotation,
00142 BOOL8 testing_on);
00143 void fit_lms_line(TO_ROW *row);
00144 void compute_page_skew(TO_BLOCK_LIST *blocks,
00145 float &page_m,
00146 float &page_err);
00147 void cleanup_rows_making(ICOORD page_tr,
00148 TO_BLOCK *block,
00149 float gradient,
00150 FCOORD rotation,
00151 inT32 block_edge,
00152 BOOL8 testing_on);
00153 void delete_non_dropout_rows(
00154 TO_BLOCK *block,
00155 float gradient,
00156 FCOORD rotation,
00157 inT32 block_edge,
00158 BOOL8 testing_on
00159 );
00160 BOOL8 find_best_dropout_row(
00161 TO_ROW *row,
00162 inT32 distance,
00163 float dist_limit,
00164 inT32 line_index,
00165 TO_ROW_IT *row_it,
00166 BOOL8 testing_on
00167 );
00168 TBOX deskew_block_coords(
00169 TO_BLOCK *block,
00170 float gradient
00171 );
00172 void compute_line_occupation(
00173 TO_BLOCK *block,
00174 float gradient,
00175 inT32 min_y,
00176 inT32 max_y,
00177 inT32 *occupation,
00178 inT32 *deltas
00179 );
00180 void compute_occupation_threshold(
00181 inT32 low_window,
00182 inT32 high_window,
00183 inT32 line_count,
00184 inT32 *occupation,
00185 inT32 *thresholds
00186 );
00187 void compute_dropout_distances(
00188 inT32 *occupation,
00189 inT32 *thresholds,
00190 inT32 line_count
00191 );
00192 void expand_rows(
00193 ICOORD page_tr,
00194 TO_BLOCK *block,
00195 float gradient,
00196 FCOORD rotation,
00197 inT32 block_edge,
00198 BOOL8 testing_on
00199 );
00200 void adjust_row_limits(
00201 TO_BLOCK *block
00202 );
00203 void compute_row_stats(
00204 TO_BLOCK *block,
00205 BOOL8 testing_on
00206 );
00207 float median_block_xheight(
00208 TO_BLOCK *block,
00209 float gradient
00210 );
00211
00212 int compute_xheight_from_modes(
00213 STATS *heights, STATS *floating_heights, bool cap_only, int min_height,
00214 int max_height, float *xheight, float *ascrise);
00215
00216 inT32 compute_row_descdrop(TO_ROW *row,
00217 float gradient,
00218 int xheight_blob_count,
00219 STATS *heights);
00220 inT32 compute_height_modes(STATS *heights,
00221 inT32 min_height,
00222 inT32 max_height,
00223 inT32 *modes,
00224 inT32 maxmodes);
00225 void correct_row_xheight(TO_ROW *row,
00226 float xheight,
00227 float ascrise,
00228 float descdrop);
00229 void separate_underlines(TO_BLOCK *block,
00230 float gradient,
00231 FCOORD rotation,
00232 BOOL8 testing_on);
00233 void pre_associate_blobs( ICOORD page_tr,
00234 TO_BLOCK *block,
00235 FCOORD rotation,
00236 BOOL8 testing_on);
00237 void fit_parallel_rows(TO_BLOCK *block,
00238 float gradient,
00239 FCOORD rotation,
00240 inT32 block_edge,
00241 BOOL8 testing_on);
00242 void fit_parallel_lms(float gradient,
00243 TO_ROW *row);
00244 void make_baseline_spline(TO_ROW *row,
00245 TO_BLOCK *block);
00246 BOOL8 segment_baseline (
00247 TO_ROW * row,
00248 TO_BLOCK * block,
00249 inT32 & segments,
00250 inT32 xstarts[]
00251 );
00252 double *linear_spline_baseline (
00253 TO_ROW * row,
00254 TO_BLOCK * block,
00255 inT32 & segments,
00256 inT32 xstarts[]
00257 );
00258 void assign_blobs_to_rows(
00259 TO_BLOCK *block,
00260 float *gradient,
00261 int pass,
00262 BOOL8 reject_misses,
00263 BOOL8 make_new_rows,
00264 BOOL8 drawing_skew
00265 );
00266
00267 OVERLAP_STATE most_overlapping_row(TO_ROW_IT *row_it,
00268 TO_ROW *&best_row,
00269 float top,
00270 float bottom,
00271 float rowsize,
00272 BOOL8 testing_blob
00273 );
00274 int blob_x_order(
00275 const void *item1,
00276 const void *item2);
00277 int row_y_order(
00278 const void *item1,
00279 const void *item2);
00280 int row_spacing_order(
00281 const void *item1,
00282 const void *item2);
00283
00284 void mark_repeated_chars(TO_ROW *row);
00285 #endif