UNICHARSET Class Reference

#include <unicharset.h>

List of all members.

Classes

struct  UNICHAR_PROPERTIES
struct  UNICHAR_SLOT

Public Types

enum  Direction {
  U_LEFT_TO_RIGHT = 0, U_RIGHT_TO_LEFT = 1, U_EUROPEAN_NUMBER = 2, U_EUROPEAN_NUMBER_SEPARATOR = 3,
  U_EUROPEAN_NUMBER_TERMINATOR = 4, U_ARABIC_NUMBER = 5, U_COMMON_NUMBER_SEPARATOR = 6, U_BLOCK_SEPARATOR = 7,
  U_SEGMENT_SEPARATOR = 8, U_WHITE_SPACE_NEUTRAL = 9, U_OTHER_NEUTRAL = 10, U_LEFT_TO_RIGHT_EMBEDDING = 11,
  U_LEFT_TO_RIGHT_OVERRIDE = 12, U_RIGHT_TO_LEFT_ARABIC = 13, U_RIGHT_TO_LEFT_EMBEDDING = 14, U_RIGHT_TO_LEFT_OVERRIDE = 15,
  U_POP_DIRECTIONAL_FORMAT = 16, U_DIR_NON_SPACING_MARK = 17, U_BOUNDARY_NEUTRAL = 18, U_CHAR_DIRECTION_COUNT
}

Public Member Functions

 UNICHARSET ()
 ~UNICHARSET ()
const UNICHAR_ID unichar_to_id (const char *const unichar_repr) const
const UNICHAR_ID unichar_to_id (const char *const unichar_repr, int length) const
int step (const char *str) const
bool encodable_string (const char *str, int *first_bad_position) const
const char *const id_to_unichar (UNICHAR_ID id) const
const char *const id_to_unichar_ext (UNICHAR_ID id) const
STRING debug_str (UNICHAR_ID id) const
STRING debug_str (const char *unichar_repr) const
void unichar_insert (const char *const unichar_repr)
bool contains_unichar_id (UNICHAR_ID unichar_id) const
bool contains_unichar (const char *const unichar_repr) const
bool contains_unichar (const char *const unichar_repr, int length) const
bool eq (UNICHAR_ID unichar_id, const char *const unichar_repr) const
void delete_pointers_in_unichars ()
void clear ()
int size () const
void reserve (int unichars_number)
bool save_to_file (const char *const filename) const
bool save_to_file (FILE *file) const
bool load_from_inmemory_file (const char *const memory, int mem_size, bool skip_fragments)
bool load_from_inmemory_file (const char *const memory, int mem_size)
bool load_from_file (const char *const filename, bool skip_fragments)
bool load_from_file (const char *const filename)
bool load_from_file (FILE *file, bool skip_fragments)
bool load_from_file (FILE *file)
void post_load_setup ()
bool major_right_to_left () const
void set_black_and_whitelist (const char *blacklist, const char *whitelist)
void set_isalpha (UNICHAR_ID unichar_id, bool value)
void set_islower (UNICHAR_ID unichar_id, bool value)
void set_isupper (UNICHAR_ID unichar_id, bool value)
void set_isdigit (UNICHAR_ID unichar_id, bool value)
void set_ispunctuation (UNICHAR_ID unichar_id, bool value)
void set_isngram (UNICHAR_ID unichar_id, bool value)
void set_script (UNICHAR_ID unichar_id, const char *value)
void set_other_case (UNICHAR_ID unichar_id, UNICHAR_ID other_case)
void set_direction (UNICHAR_ID unichar_id, UNICHARSET::Direction value)
void set_mirror (UNICHAR_ID unichar_id, UNICHAR_ID mirror)
void set_normed (UNICHAR_ID unichar_id, const char *normed)
bool get_isalpha (UNICHAR_ID unichar_id) const
bool get_islower (UNICHAR_ID unichar_id) const
bool get_isupper (UNICHAR_ID unichar_id) const
bool get_isdigit (UNICHAR_ID unichar_id) const
bool get_ispunctuation (UNICHAR_ID unichar_id) const
bool get_isngram (UNICHAR_ID unichar_id) const
bool get_isprivate (UNICHAR_ID unichar_id) const
bool top_bottom_useful () const
void set_ranges_empty ()
void SetPropertiesFromOther (const UNICHARSET &src)
void ExpandRangesFromOther (const UNICHARSET &src)
void AppendOtherUnicharset (const UNICHARSET &src)
void get_top_bottom (UNICHAR_ID unichar_id, int *min_bottom, int *max_bottom, int *min_top, int *max_top) const
void set_top_bottom (UNICHAR_ID unichar_id, int min_bottom, int max_bottom, int min_top, int max_top)
void get_width_range (UNICHAR_ID unichar_id, int *min_width, int *max_width) const
void set_width_range (UNICHAR_ID unichar_id, int min_width, int max_width)
void get_bearing_range (UNICHAR_ID unichar_id, int *min_bearing, int *max_bearing) const
void set_bearing_range (UNICHAR_ID unichar_id, int min_bearing, int max_bearing)
void get_advance_range (UNICHAR_ID unichar_id, int *min_advance, int *max_advance) const
void set_advance_range (UNICHAR_ID unichar_id, int min_advance, int max_advance)
int get_script (UNICHAR_ID unichar_id) const
unsigned int get_properties (UNICHAR_ID unichar_id) const
char get_chartype (UNICHAR_ID unichar_id) const
UNICHAR_ID get_other_case (UNICHAR_ID unichar_id) const
Direction get_direction (UNICHAR_ID unichar_id) const
UNICHAR_ID get_mirror (UNICHAR_ID unichar_id) const
UNICHAR_ID to_lower (UNICHAR_ID unichar_id) const
UNICHAR_ID to_upper (UNICHAR_ID unichar_id) const
const CHAR_FRAGMENTget_fragment (UNICHAR_ID unichar_id) const
bool get_isalpha (const char *const unichar_repr) const
bool get_islower (const char *const unichar_repr) const
bool get_isupper (const char *const unichar_repr) const
bool get_isdigit (const char *const unichar_repr) const
bool get_ispunctuation (const char *const unichar_repr) const
unsigned int get_properties (const char *const unichar_repr) const
char get_chartype (const char *const unichar_repr) const
int get_script (const char *const unichar_repr) const
const CHAR_FRAGMENTget_fragment (const char *const unichar_repr) const
bool get_isalpha (const char *const unichar_repr, int length) const
bool get_islower (const char *const unichar_repr, int length) const
bool get_isupper (const char *const unichar_repr, int length) const
bool get_isdigit (const char *const unichar_repr, int length) const
bool get_ispunctuation (const char *const unichar_repr, int length) const
const char * get_normed_unichar (UNICHAR_ID unichar_id) const
int get_script (const char *const unichar_repr, int length) const
int get_script_table_size () const
const char * get_script_from_script_id (int id) const
int get_script_id_from_name (const char *script_name) const
bool is_null_script (const char *script) const
int add_script (const char *script)
bool get_enabled (UNICHAR_ID unichar_id) const
int null_sid () const
int common_sid () const
int latin_sid () const
int cyrillic_sid () const
int greek_sid () const
int han_sid () const
int hiragana_sid () const
int katakana_sid () const
int default_sid () const
bool script_has_upper_lower () const
bool script_has_xheight () const

Static Public Member Functions

static STRING debug_utf8_str (const char *str)

Static Public Attributes

static const char * kCustomLigatures [][2]

Member Enumeration Documentation

Enumerator:
U_LEFT_TO_RIGHT 
U_RIGHT_TO_LEFT 
U_EUROPEAN_NUMBER 
U_EUROPEAN_NUMBER_SEPARATOR 
U_EUROPEAN_NUMBER_TERMINATOR 
U_ARABIC_NUMBER 
U_COMMON_NUMBER_SEPARATOR 
U_BLOCK_SEPARATOR 
U_SEGMENT_SEPARATOR 
U_WHITE_SPACE_NEUTRAL 
U_OTHER_NEUTRAL 
U_LEFT_TO_RIGHT_EMBEDDING 
U_LEFT_TO_RIGHT_OVERRIDE 
U_RIGHT_TO_LEFT_ARABIC 
U_RIGHT_TO_LEFT_EMBEDDING 
U_RIGHT_TO_LEFT_OVERRIDE 
U_POP_DIRECTIONAL_FORMAT 
U_DIR_NON_SPACING_MARK 
U_BOUNDARY_NEUTRAL 
U_CHAR_DIRECTION_COUNT 

Constructor & Destructor Documentation

UNICHARSET::UNICHARSET (  ) 
UNICHARSET::~UNICHARSET (  ) 

Member Function Documentation

int UNICHARSET::add_script ( const char *  script  ) 
void UNICHARSET::AppendOtherUnicharset ( const UNICHARSET src  ) 
void UNICHARSET::clear (  )  [inline]
int UNICHARSET::common_sid (  )  const [inline]
bool UNICHARSET::contains_unichar ( const char *const   unichar_repr,
int  length 
) const
bool UNICHARSET::contains_unichar ( const char *const   unichar_repr  )  const
bool UNICHARSET::contains_unichar_id ( UNICHAR_ID  unichar_id  )  const [inline]
int UNICHARSET::cyrillic_sid (  )  const [inline]
STRING UNICHARSET::debug_str ( const char *  unichar_repr  )  const [inline]
STRING UNICHARSET::debug_str ( UNICHAR_ID  id  )  const
STRING UNICHARSET::debug_utf8_str ( const char *  str  )  [static]
int UNICHARSET::default_sid (  )  const [inline]
void UNICHARSET::delete_pointers_in_unichars (  )  [inline]
bool UNICHARSET::encodable_string ( const char *  str,
int *  first_bad_position 
) const
bool UNICHARSET::eq ( UNICHAR_ID  unichar_id,
const char *const   unichar_repr 
) const
void UNICHARSET::ExpandRangesFromOther ( const UNICHARSET src  ) 
void UNICHARSET::get_advance_range ( UNICHAR_ID  unichar_id,
int *  min_advance,
int *  max_advance 
) const [inline]
void UNICHARSET::get_bearing_range ( UNICHAR_ID  unichar_id,
int *  min_bearing,
int *  max_bearing 
) const [inline]
char UNICHARSET::get_chartype ( const char *const   unichar_repr  )  const [inline]
char UNICHARSET::get_chartype ( UNICHAR_ID  unichar_id  )  const
Direction UNICHARSET::get_direction ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_enabled ( UNICHAR_ID  unichar_id  )  const [inline]
const CHAR_FRAGMENT* UNICHARSET::get_fragment ( const char *const   unichar_repr  )  const [inline]
const CHAR_FRAGMENT* UNICHARSET::get_fragment ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_isalpha ( const char *const   unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_isalpha ( const char *const   unichar_repr  )  const [inline]
bool UNICHARSET::get_isalpha ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_isdigit ( const char *const   unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_isdigit ( const char *const   unichar_repr  )  const [inline]
bool UNICHARSET::get_isdigit ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_islower ( const char *const   unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_islower ( const char *const   unichar_repr  )  const [inline]
bool UNICHARSET::get_islower ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_isngram ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_isprivate ( UNICHAR_ID  unichar_id  )  const
bool UNICHARSET::get_ispunctuation ( const char *const   unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_ispunctuation ( const char *const   unichar_repr  )  const [inline]
bool UNICHARSET::get_ispunctuation ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::get_isupper ( const char *const   unichar_repr,
int  length 
) const [inline]
bool UNICHARSET::get_isupper ( const char *const   unichar_repr  )  const [inline]
bool UNICHARSET::get_isupper ( UNICHAR_ID  unichar_id  )  const [inline]
UNICHAR_ID UNICHARSET::get_mirror ( UNICHAR_ID  unichar_id  )  const [inline]
const char* UNICHARSET::get_normed_unichar ( UNICHAR_ID  unichar_id  )  const [inline]
UNICHAR_ID UNICHARSET::get_other_case ( UNICHAR_ID  unichar_id  )  const [inline]
unsigned int UNICHARSET::get_properties ( const char *const   unichar_repr  )  const [inline]
unsigned int UNICHARSET::get_properties ( UNICHAR_ID  unichar_id  )  const
int UNICHARSET::get_script ( const char *const   unichar_repr,
int  length 
) const [inline]
int UNICHARSET::get_script ( const char *const   unichar_repr  )  const [inline]
int UNICHARSET::get_script ( UNICHAR_ID  unichar_id  )  const [inline]
const char* UNICHARSET::get_script_from_script_id ( int  id  )  const [inline]
int UNICHARSET::get_script_id_from_name ( const char *  script_name  )  const
int UNICHARSET::get_script_table_size (  )  const [inline]
void UNICHARSET::get_top_bottom ( UNICHAR_ID  unichar_id,
int *  min_bottom,
int *  max_bottom,
int *  min_top,
int *  max_top 
) const [inline]
void UNICHARSET::get_width_range ( UNICHAR_ID  unichar_id,
int *  min_width,
int *  max_width 
) const [inline]
int UNICHARSET::greek_sid (  )  const [inline]
int UNICHARSET::han_sid (  )  const [inline]
int UNICHARSET::hiragana_sid (  )  const [inline]
const char *const UNICHARSET::id_to_unichar ( UNICHAR_ID  id  )  const
const char *const UNICHARSET::id_to_unichar_ext ( UNICHAR_ID  id  )  const
bool UNICHARSET::is_null_script ( const char *  script  )  const [inline]
int UNICHARSET::katakana_sid (  )  const [inline]
int UNICHARSET::latin_sid (  )  const [inline]
bool UNICHARSET::load_from_file ( FILE *  file  )  [inline]
bool UNICHARSET::load_from_file ( FILE *  file,
bool  skip_fragments 
)
bool UNICHARSET::load_from_file ( const char *const   filename  )  [inline]
bool UNICHARSET::load_from_file ( const char *const   filename,
bool  skip_fragments 
) [inline]
bool UNICHARSET::load_from_inmemory_file ( const char *const   memory,
int  mem_size 
) [inline]
bool UNICHARSET::load_from_inmemory_file ( const char *const  memory,
int  mem_size,
bool  skip_fragments 
)
bool UNICHARSET::major_right_to_left (  )  const
int UNICHARSET::null_sid (  )  const [inline]
void UNICHARSET::post_load_setup (  ) 
void UNICHARSET::reserve ( int  unichars_number  ) 
bool UNICHARSET::save_to_file ( FILE *  file  )  const
bool UNICHARSET::save_to_file ( const char *const   filename  )  const [inline]
bool UNICHARSET::script_has_upper_lower (  )  const [inline]
bool UNICHARSET::script_has_xheight (  )  const [inline]
void UNICHARSET::set_advance_range ( UNICHAR_ID  unichar_id,
int  min_advance,
int  max_advance 
) [inline]
void UNICHARSET::set_bearing_range ( UNICHAR_ID  unichar_id,
int  min_bearing,
int  max_bearing 
) [inline]
void UNICHARSET::set_black_and_whitelist ( const char *  blacklist,
const char *  whitelist 
)
void UNICHARSET::set_direction ( UNICHAR_ID  unichar_id,
UNICHARSET::Direction  value 
) [inline]
void UNICHARSET::set_isalpha ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isdigit ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_islower ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isngram ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_ispunctuation ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_isupper ( UNICHAR_ID  unichar_id,
bool  value 
) [inline]
void UNICHARSET::set_mirror ( UNICHAR_ID  unichar_id,
UNICHAR_ID  mirror 
) [inline]
void UNICHARSET::set_normed ( UNICHAR_ID  unichar_id,
const char *  normed 
) [inline]
void UNICHARSET::set_other_case ( UNICHAR_ID  unichar_id,
UNICHAR_ID  other_case 
) [inline]
void UNICHARSET::set_ranges_empty (  ) 
void UNICHARSET::set_script ( UNICHAR_ID  unichar_id,
const char *  value 
) [inline]
void UNICHARSET::set_top_bottom ( UNICHAR_ID  unichar_id,
int  min_bottom,
int  max_bottom,
int  min_top,
int  max_top 
) [inline]
void UNICHARSET::set_width_range ( UNICHAR_ID  unichar_id,
int  min_width,
int  max_width 
) [inline]
void UNICHARSET::SetPropertiesFromOther ( const UNICHARSET src  ) 
int UNICHARSET::size (  )  const [inline]
int UNICHARSET::step ( const char *  str  )  const
UNICHAR_ID UNICHARSET::to_lower ( UNICHAR_ID  unichar_id  )  const [inline]
UNICHAR_ID UNICHARSET::to_upper ( UNICHAR_ID  unichar_id  )  const [inline]
bool UNICHARSET::top_bottom_useful (  )  const [inline]
void UNICHARSET::unichar_insert ( const char *const   unichar_repr  ) 
const UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const   unichar_repr,
int  length 
) const
const UNICHAR_ID UNICHARSET::unichar_to_id ( const char *const   unichar_repr  )  const

Member Data Documentation

const char * UNICHARSET::kCustomLigatures [static]
Initial value:
 {
  {"ct", "\uE003"},  
  {"ſh", "\uE006"},  
  {"ſi", "\uE007"},  
  {"ſl", "\uE008"},  
  {"ſſ", "\uE009"},  
  {NULL, NULL}
}

The documentation for this class was generated from the following files:
Generated on Thu Feb 2 08:19:25 2012 for Tesseract by  doxygen 1.6.3