Public Member Functions |
| Wordrec () |
virtual | ~Wordrec () |
void | CopyCharChoices (const BLOB_CHOICE_LIST_VECTOR &from, BLOB_CHOICE_LIST_VECTOR *to) |
bool | ChoiceIsCorrect (const UNICHARSET &uni_set, const WERD_CHOICE *choice, const GenericVector< STRING > &truth_text) |
void | SaveAltChoices (const LIST &best_choices, WERD_RES *word) |
void | FillLattice (const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
void | CallFillLattice (const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
void | update_ratings (const BLOB_CHOICE_LIST_VECTOR &new_choices, const CHUNKS_RECORD *chunks_record, const SEARCH_STATE search_state) |
void | SegSearch (CHUNKS_RECORD *chunks_record, WERD_CHOICE *best_choice, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_CHOICE *raw_choice, STATE *output_best_state, BlamerBundle *blamer_bundle) |
SEAM * | attempt_blob_chop (TWERD *word, TBLOB *blob, inT32 blob_number, bool italic_blob, SEAMS seam_list) |
SEAM * | chop_numbered_blob (TWERD *word, inT32 blob_number, bool italic_blob, SEAMS seam_list) |
SEAM * | chop_overlapping_blob (const GenericVector< TBOX > &boxes, WERD_RES *word_res, inT32 *blob_number, bool italic_blob, SEAMS seam_list) |
void | junk_worst_seam (SEAM_QUEUE seams, SEAM *new_seam, float new_priority) |
void | choose_best_seam (SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SPLIT *split, PRIORITY priority, SEAM **seam_result, TBLOB *blob) |
void | combine_seam (SEAM_QUEUE seam_queue, SEAM_PILE seam_pile, SEAM *seam) |
inT16 | constrained_split (SPLIT *split, TBLOB *blob) |
void | delete_seam_pile (SEAM_PILE seam_pile) |
SEAM * | pick_good_seam (TBLOB *blob) |
PRIORITY | seam_priority (SEAM *seam, inT16 xmin, inT16 xmax) |
void | try_point_pairs (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SEAM **seam, TBLOB *blob) |
void | try_vertical_splits (EDGEPT *points[MAX_NUM_POINTS], inT16 num_points, EDGEPT_CLIST *new_points, SEAM_QUEUE seam_queue, SEAM_PILE *seam_pile, SEAM **seam, TBLOB *blob) |
PRIORITY | full_split_priority (SPLIT *split, inT16 xmin, inT16 xmax) |
PRIORITY | grade_center_of_blob (register BOUNDS_RECT rect) |
PRIORITY | grade_overlap (register BOUNDS_RECT rect) |
PRIORITY | grade_split_length (register SPLIT *split) |
PRIORITY | grade_sharpness (register SPLIT *split) |
PRIORITY | grade_width_change (register BOUNDS_RECT rect) |
void | set_outline_bounds (register EDGEPT *point1, register EDGEPT *point2, BOUNDS_RECT rect) |
int | crosses_outline (EDGEPT *p0, EDGEPT *p1, EDGEPT *outline) |
int | is_crossed (TPOINT a0, TPOINT a1, TPOINT b0, TPOINT b1) |
int | is_same_edgept (EDGEPT *p1, EDGEPT *p2) |
bool | near_point (EDGEPT *point, EDGEPT *line_pt_0, EDGEPT *line_pt_1, EDGEPT **near_pt) |
void | reverse_outline (EDGEPT *outline) |
virtual BLOB_CHOICE_LIST * | classify_piece (TBLOB *pieces, const DENORM &denorm, SEAMS seams, inT16 start, inT16 end, BlamerBundle *blamer_bundle) |
void | merge_fragments (MATRIX *ratings, inT16 num_blobs) |
void | get_fragment_lists (inT16 current_frag, inT16 current_row, inT16 start, inT16 num_frag_parts, inT16 num_blobs, MATRIX *ratings, BLOB_CHOICE_LIST *choice_lists) |
void | merge_and_put_fragment_lists (inT16 row, inT16 column, inT16 num_frag_parts, BLOB_CHOICE_LIST *choice_lists, MATRIX *ratings) |
void | fill_filtered_fragment_list (BLOB_CHOICE_LIST *choices, int fragment_pos, int num_frag_parts, BLOB_CHOICE_LIST *filtered_choices) |
BLOB_CHOICE_LIST * | get_piece_rating (MATRIX *ratings, TBLOB *blobs, const DENORM &denorm, SEAMS seams, inT16 start, inT16 end, BlamerBundle *blamer_bundle) |
TBOX * | record_blob_bounds (TBLOB *blobs) |
MATRIX * | record_piece_ratings (TBLOB *blobs) |
WIDTH_RECORD * | state_char_widths (WIDTH_RECORD *chunk_widths, STATE *state, int num_joints) |
FLOAT32 | get_width_variance (WIDTH_RECORD *wrec, float norm_height) |
FLOAT32 | get_gap_variance (WIDTH_RECORD *wrec, float norm_height) |
FLOAT32 | prioritize_state (CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) |
FLOAT32 | width_priority (CHUNKS_RECORD *chunks_record, STATE *state, int num_joints) |
FLOAT32 | seamcut_priority (SEAMS seams, STATE *state, int num_joints) |
FLOAT32 | rating_priority (CHUNKS_RECORD *chunks_record, STATE *state, int num_joints) |
|
Initialize all the things in the program that need to be initialized. init_permute determines whether to initialize the permute functions and Dawg models.
|
void | program_editup (const char *textbase, bool init_classifier, bool init_permute) |
|
|
BLOB_CHOICE_LIST_VECTOR * | cc_recog (WERD_RES *word) |
|
This function holds any nessessary post processing for the Wise Owl program.
|
void | program_editdown (inT32 elasped_time) |
|
Get ready to do some pass 1 stuff.
|
void | set_pass1 () |
|
Get ready to do some pass 2 stuff.
|
void | set_pass2 () |
|
Cleanup and exit the recog program.
|
int | end_recog () |
|
Called from Tess with a blob in tess form. The blob may need rotating to the correct orientation for classification.
|
BLOB_CHOICE_LIST * | call_matcher (const DENORM *denorm, TBLOB *blob) |
|
Test the dictionaries, returning NO_PERM (0) if not found, or one of the PermuterType values if found, according to the dictionary.
|
int | dict_word (const WERD_CHOICE &word) |
|
Classify the this blob if it is not already recorded in the match table. Attempt to recognize this blob as a character. The recognition rating for this blob will be stored as a part of the blob. This value will also be returned to the caller.
- Parameters:
-
| blob | Current blob |
| string | The string to display in ScrollView |
| color | The colour to use when displayed with ScrollView |
|
BLOB_CHOICE_LIST * | classify_blob (TBLOB *blob, const DENORM &denorm, const char *string, C_COL color, BlamerBundle *blamer_bundle) |
BLOB_CHOICE_LIST * | fake_classify_blob (UNICHAR_ID class_id, float rating, float certainty) |
|
For each blob in the given word update match_table with the corresponding BLOB_CHOICES_LIST from choices.
|
void | update_blob_classifications (TWERD *word, const BLOB_CHOICE_LIST_VECTOR &choices) |
|
Find the best segmentation by doing a best first search of the solution space.
|
BLOB_CHOICE_LIST_VECTOR * | evaluate_chunks (CHUNKS_RECORD *chunks_record, SEARCH_STATE search_state, BlamerBundle *blamer_bundle) |
void | best_first_search (CHUNKS_RECORD *chunks_record, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_RES *word, STATE *state, DANGERR *fixpt, STATE *best_state) |
void | delete_search (SEARCH_RECORD *the_search) |
|
Evaluate the segmentation that is represented by this state in the best first search. Add this state to the "states_seen" list.
|
inT16 | evaluate_state (CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search, DANGERR *fixpt, BlamerBundle *blamer_bundle) |
BLOB_CHOICE_LIST_VECTOR * | rebuild_current_state (WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *char_choices, MATRIX *ratings) |
|
Create and initialize a new search record.
|
SEARCH_RECORD * | new_search (CHUNKS_RECORD *chunks_record, int num_joints, BLOB_CHOICE_LIST_VECTOR *best_char_choices, WERD_CHOICE *best_choice, WERD_CHOICE *raw_choice, STATE *state) |
|
Create the states that are attached to this one. Check to see that each one has not already been visited. If not add it to the priority queue.
|
void | expand_node (FLOAT32 worst_priority, CHUNKS_RECORD *chunks_record, SEARCH_RECORD *the_search) |
|
Replace the value of the char_width field in the chunks_record with the updated width measurements from the last_segmentation.
|
void | replace_char_widths (CHUNKS_RECORD *chunks_record, SEARCH_STATE state) |
BLOB_CHOICE * | rebuild_fragments (const char *unichar, const char *expanded_fragment_lengths, int choice_index, BLOB_CHOICE_LIST_VECTOR *old_choices) |
BLOB_CHOICE_LIST * | join_blobs_and_classify (WERD_RES *word, int x, int y, int choice_index, MATRIX *ratings, BLOB_CHOICE_LIST_VECTOR *old_choices) |
|
Get this state from the priority queue. It should be the state that has the greatest urgency to be evaluated.
|
STATE * | pop_queue (HEAP *queue) |
|
Add this state into the priority queue.
|
void | push_queue (HEAP *queue, STATE *state, FLOAT32 worst_priority, FLOAT32 priority, bool debug) |
|
Assign a priority to and edge point that might be used as part of a split. The argument should be of type EDGEPT.
|
PRIORITY | point_priority (EDGEPT *point) |
|
Add an edge point to a POINT_GROUP containg a list of other points.
|
void | add_point_to_list (POINT_GROUP point_list, EDGEPT *point) |
|
Return the change in angle (degrees) of the line segments between points one and two, and two and three.
|
int | angle_change (EDGEPT *point1, EDGEPT *point2, EDGEPT *point3) |
|
Return TRUE if one of the pieces resulting from this split would less than some number of edge points.
|
int | is_little_chunk (EDGEPT *point1, EDGEPT *point2) |
|
Test the area defined by a split accross this outline.
|
int | is_small_area (EDGEPT *point1, EDGEPT *point2) |
|
Choose the edge point that is closest to the critical point. This point may not be exactly vertical from the critical point.
|
EDGEPT * | pick_close_point (EDGEPT *critical_point, EDGEPT *vertical_point, int *best_dist) |
|
Find a list of edge points from the outer outline of this blob. For each of these points assign a priority. Sort these points using a heap structure so that they can be visited in order.
|
void | prioritize_points (TESSLINE *outline, POINT_GROUP points) |
|
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL.
|
void | new_min_point (EDGEPT *local_min, POINT_GROUP points) |
|
Found a new minimum point try to decide whether to save it or not. Return the new value for the local minimum. If a point is saved then the local minimum is reset to NULL.
|
void | new_max_point (EDGEPT *local_max, POINT_GROUP points) |
|
For one point on the outline, find the corresponding point on the other side of the outline that is a likely projection for a split point. This is done by iterating through the edge points until the X value of the point being looked at is greater than the X value of the split point. Ensure that the point being returned is not right next to the split point. Return the edge point in *best_point as a result, and any points that were newly created are also saved on the new_points list.
|
void | vertical_projection_point (EDGEPT *split_point, EDGEPT *target_point, EDGEPT **best_point, EDGEPT_CLIST *new_points) |
|
Start with the current word of blobs and its classification. Find the worst blobs and try to divide it up to improve the ratings.
|
bool | improve_one_blob (WERD_RES *word_res, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, DANGERR *fixpt, bool split_next_to_fragment, BlamerBundle *blamer_bundle) |
|
Takes a blob and its chop index, converts that chop index to a unichar_id, and stores the chop index in place of the blob's original unichar_id.
|
void | modify_blob_choice (BLOB_CHOICE_LIST *answer, int chop_index) |
|
Start with the current one-blob word and its classification. Find the worst blobs and try to divide it up to improve the ratings. Used for testing chopper.
|
bool | chop_one_blob (TWERD *word, BLOB_CHOICE_LIST_VECTOR *char_choices, inT32 *blob_number, SEAMS *seam_list, int *right_chop_index) |
bool | chop_one_blob2 (const GenericVector< TBOX > &boxes, WERD_RES *word_res, SEAMS *seam_list) |
|
Classify the blobs in this word and permute the results. Find the worst blob in the word and chop it up. Continue this process until a good answer has been found or all the blobs have been chopped up enough. Return the word level ratings.
|
BLOB_CHOICE_LIST_VECTOR * | chop_word_main (WERD_RES *word) |
|
Start with the current word of blobs and its classification. Find the worst blobs and try to divide them up to improve the ratings. As long as ratings are produced by the new blob splitting. When all the splitting has been accomplished all the ratings memory is reclaimed.
|
void | improve_by_chopping (WERD_RES *word, BLOB_CHOICE_LIST_VECTOR *char_choices, STATE *best_state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, bool *updated_best_choice) |
MATRIX * | word_associator (bool only_create_ratings_matrtix, WERD_RES *word, STATE *state, BLOB_CHOICE_LIST_VECTOR *best_char_choices, DANGERR *fixpt, STATE *best_state) |
inT16 | select_blob_to_split (const BLOB_CHOICE_LIST_VECTOR &char_choices, float rating_ceiling, bool split_next_to_fragment) |
void | set_chopper_blame (WERD_RES *word) |
Public Attributes |
bool | merge_fragments_in_matrix = TRUE |
bool | wordrec_no_block = FALSE |
bool | wordrec_enable_assoc = TRUE |
bool | force_word_assoc = FALSE |
int | wordrec_num_seg_states = 30 |
double | wordrec_worst_state = 1 |
bool | fragments_guide_chopper = FALSE |
int | repair_unchopped_blobs = 1 |
double | tessedit_certainty_threshold = -2.25 |
int | chop_debug = 0 |
bool | chop_enable = 1 |
bool | chop_vertical_creep = 0 |
int | chop_split_length = 10000 |
int | chop_same_distance = 2 |
int | chop_min_outline_points = 6 |
int | chop_inside_angle = -50 |
int | chop_min_outline_area = 2000 |
double | chop_split_dist_knob = 0.5 |
double | chop_overlap_knob = 0.9 |
double | chop_center_knob = 0.15 |
double | chop_sharpness_knob = 0.06 |
double | chop_width_change_knob = 5.0 |
double | chop_ok_split = 100.0 |
double | chop_good_split = 50.0 |
int | chop_x_y_weight = 3 |
int | segment_adjust_debug = 0 |
bool | assume_fixed_pitch_char_segment = FALSE |
bool | use_new_state_cost = FALSE |
double | heuristic_segcost_rating_base = 1.25 |
double | heuristic_weight_rating = 1 |
double | heuristic_weight_width = 0 |
double | heuristic_weight_seamcut = 0 |
double | heuristic_max_char_wh_ratio = 2.0 |
int | wordrec_debug_level = 0 |
bool | wordrec_debug_blamer = false |
bool | wordrec_run_blamer = false |
bool | enable_new_segsearch = false |
int | segsearch_debug_level = 0 |
int | segsearch_max_pain_points = 2000 |
int | segsearch_max_futile_classifications = 10 |
double | segsearch_max_char_wh_ratio = 2.0 |
double | segsearch_max_fixed_pitch_char_wh_ratio = 2.0 |
bool | save_alt_choices = false |
LanguageModel * | language_model_ |
PRIORITY | pass2_ok_split |
int | pass2_seg_states |
int | num_joints |
int | num_pushed |
int | num_popped |
BlobMatchTable | blob_match_table |
EVALUATION_ARRAY | last_segmentation |
WERD_CHOICE * | prev_word_best_choice_ |
GenericVector< int > | blame_reasons_ |
void(Wordrec::* | fill_lattice_ )(const MATRIX &ratings, const LIST &best_choices, const UNICHARSET &unicharset, BlamerBundle *blamer_bundle) |
Protected Member Functions |
bool | SegSearchDone (int num_futile_classifications) |
void | UpdateSegSearchNodes (int starting_col, SEG_SEARCH_PENDING_LIST *pending[], BestPathByColumn *best_path_by_column[], CHUNKS_RECORD *chunks_record, HEAP *pain_points, BestChoiceBundle *best_choice_bundle, BlamerBundle *blamer_bundle) |
void | ProcessSegSearchPainPoint (float pain_point_priority, const MATRIX_COORD &pain_point, const WERD_CHOICE *best_choice, SEG_SEARCH_PENDING_LIST *pending[], CHUNKS_RECORD *chunks_record, HEAP *pain_points, BlamerBundle *blamer_bundle) |
void | InitBlamerForSegSearch (const WERD_CHOICE *best_choice, CHUNKS_RECORD *chunks_record, HEAP *pain_points, BlamerBundle *blamer_bundle, STRING *blamer_debug) |
void | FinishBlamerForSegSearch (const WERD_CHOICE *best_choice, BlamerBundle *blamer_bundle, STRING *blamer_debug) |