﻿# Tesseract replacement map Level 2

# Resolves common mistakes of Tesseract in non rotated text
# FORM IS 
# <unicode> ; <unicode...> ;  ( <char> → <char...> ) <CHARACTER NAME> → <CHARACTER NAME(S)>
 
20AC ;	0043 ;	( € → C ) EURO SIGN → LATIN CAPITAL LETTER C
A792 ;	0043 ;	( Ꞓ  → C ) LATIN CAPITAL LETTER C WITH BAR → LATIN CAPITAL LETTER C
0073 ;	0035 ;	( s → 5 ) LATIN SMALL LETTER S → DIGIT FIVE
0053 ;	0035 ;	( S → 5 ) LATIN CAPITAL LETTER S → DIGIT FIVE
004F ;	0030 ;	( O → 0 ) LATIN CAPITAL LETTER O → DIGIT ZERO
0051 ;	004F ;	( Q → O ) LATIN CAPITAL LETTER Q → LATIN CAPITAL LETTER O
00DF ;  0042 ;  ( ß → B ) LATIN SMALL LETTER SHARP S → LATIN CAPITAL LETTER B
00DF ;  0052 ;  ( ß → R ) LATIN SMALL LETTER SHARP S → LATIN CAPITAL LETTER R
0056 ;  0076 ;  ( V → v ) LATIN CAPITAL LETTER V → LATIN SMALL LETTER v
0057 ;  0077 ;  ( W → w ) LATIN CAPITAL LETTER W → LATIN SMALL LETTER w
0058 ;  0078 ;  ( X → x ) LATIN CAPITAL LETTER X → LATIN SMALL LETTER x
0065 ;  0063 ;  ( e → c ) LATIN SMALL LETTER E → LATIN SMALL LETTER C
0074 ;  0066 ;  ( t → f ) LATIN SMALL LETTER T → LATIN SMALL LETTER F
0037 ;  0027 ;  ( 7 → / ) DIGIT SEVEN → SOLIDUS  
0039 ;  0067 ;  ( 9 → g ) DIGIT NINE → LATIN SMALL LETTER G 
006E 0069 ; 006D 0027 ; ( ni → m' ) LATIN SMALL LETTER N, LATIN SMALL LETTER I → LATIN SMALL LETTER M, APOSTROPHE
0026 ;  0061 ;  ( & → a ) AMPERSAND → LATIN SMALL LETTER A
0058 ;  004B ;  ( X → K ) LATIN CAPITAL LETTER X → LATIN CAPITAL LETTER K
00F6 ;  006F 0308 ;  ( ö → o ̈  ) LATIN SMALL LETTER O WITH DIAERESIS →  LATIN SMALL LETTER O, COMBINING DIAERESIS
00E4 ;  0061 0308 ;  ( ä → a ̈  ) LATIN SMALL LETTER A WITH DIAERESIS →  LATIN SMALL LETTER A, COMBINING DIAERESIS
00FC ;  0075 0308 ;  ( ü → u ̈  ) LATIN SMALL LETTER U WITH DIAERESIS →  LATIN SMALL LETTER U, COMBINING DIAERESIS
0031 ;  0049 ; ( 1 → I ) DIGIT ONE → LATIN CAPITAL LETTER I
0031 ;  006C ; ( 1 → l ) DIGIT ONE → LATIN SMALL LETTER L 
0031 ;  0069 ; ( 1 → i ) DIGIT ONE → LATIN SMALL LETTER I
0031 ;  006C ; ( 1 → l ) DIGIT ONE → LATIN SMALL LETTER L 
0069 ;  006C ; ( i → l ) LATIN SMALL LETTER I → LATIN SMALL LETTER L
00A7 ;  0038 ; ( § → 8 ) SECTION SIGN → DIGIT EIGHT  
00A7 ;  0026 ; ( § → & ) SECTION SIGN → AMPERSAND
00A7 ;  0024 ; ( § → $ ) SECTION SIGN → DOLLAR SIGN

# Tesseract 4 seems to duplicate characters if the confidence is equal for both hits
0031 0069 ;  0069 ; ( 1i → i ) DIGIT ONE, LATIN SMALL LETTER I → LATIN SMALL LETTER I
0069 0031 ;  0069 ; ( i1 → i ) LATIN SMALL LETTER I, DIGIT ONE → LATIN SMALL LETTER I
0069 007A ;  0069 ; ( iz → i ) LATIN SMALL LETTER Z, LATIN SMALL LETTER I → LATIN SMALL LETTER I
0131 007A ;  0069 ; ( ız → i ) LATIN SMALL LETTER DOTLESS I, LATIN SMALL LETTER DOTLESS I → LATIN SMALL LETTER I
006C 0069 ;  0069 ; ( li → i ) LATIN SMALL LETTER L, LATIN SMALL LETTER I → LATIN SMALL LETTER I
0069 0131 ;  0069 ; ( iı → i ) LATIN SMALL LETTER DOTLESS I, LATIN SMALL LETTER I → LATIN SMALL LETTER I
0131 0069 ;  0069 ; ( ıi → i ) LATIN SMALL LETTER I, LATIN SMALL LETTER DOTLESS I → LATIN SMALL LETTER I
0074 0066 ;  0066 ; ( tf → f ) LATIN SMALL LETTER T, LATIN SMALL LETTER F → LATIN SMALL LETTER F
0066 0074 ;  0066 ; ( ft → f ) LATIN SMALL LETTER F, LATIN SMALL LETTER T → LATIN SMALL LETTER F
004A 006A ;  006A ; ( Jj → j ) LATIN CAPITAL LETTER J, LATIN SMALL LETTER J → LATIN SMALL LETTER J
004F 0030 ;  0030 ;	( O0 → 0 ) LATIN CAPITAL LETTER O, DIGIT ZERO → DIGIT ZERO

0065 ; 006F ; ( e → o ) LATIN SMALL LETTER E → LATIN SMALL LETTER O
00A8 0131 ; 0027 0031 0027 ; ( ¨ı → '1' ) DIAERESIS, LATIN SMALL LETTER DOTLESS I → APOSTROPHE, DIGIT ONE, APOSTROPHE
00A8 0131 ; 0069 ; ( ¨ı → i ) DIAERESIS, LATIN SMALL LETTER DOTLESS I → LATIN SMALL LETTER I
0078 ; 006C 0079 ; ( m → ln ) LATIN SMALL LETTER M  →  LATIN SMALL LETTER L, LATIN SMALL LETTER N 
0069 0076 ; 0072 0076 ; ( iv → rv )
0053 0024;	0053 ;	( S$ → S ) LATIN CAPITAL LETTER S, DOLLAR SIGN → LATIN CAPITAL LETTER S

#problem for OcrTest
0031 0033 ;  00DF ; ( 13 → ß ) DIGIT ONE, DIGIT THREE → LATIN SMALL LETTER SHARP S
0069 0069 ;  00FC ; ( 13 → ß ) LATIN SMALL LETTER I, LATIN SMALL LETTER I → Latin Small Letter U with Diaeresis
003E ;  0033 ; ( > → 3 ) GREATER THAN SIGN → DIGIT THREE
003E ;  0035 ; ( > → 5 ) GREATER THAN SIGN → DIGIT FIVE
00AE ;  0036 ; ( ® → 6 ) REGISTERED SIGN → DIGIT SIX
00AE ;  0038 ; ( ® → 8 ) REGISTERED SIGN → DIGIT EIGHT
00AE ;  0039 ; ( ® → 9 ) REGISTERED SIGN → DIGIT NINE
0031 ;  0021 ; ( 1 → ! ) DIGIT ONE → EXCLAMATION MARK
0022 ;  00B4 ; ( ” → ´ ) QUOTATION MARK → ACUTE ACCENT
0022 ;  0027 ; ( ” → ´ ) QUOTATION MARK → APOSTROPHE
003F ;	0032 ;	( ? → 2 ) QUESTION MARK → DIGIT TWO
003F ;	0033 ;	( ? → 3 ) QUESTION MARK → DIGIT THREE
0074 ;  0072 ;  ( t → r ) LATIN SMALL LETTER T → LATIN SMALL LETTER R
0074 ;  0069 ;  ( t → i ) LATIN SMALL LETTER T → LATIN SMALL LETTER I
0030 ;  006F ;  ( 0 → o ) DIGIT ZERO → LATIN SMALL LETTER O

# Blanko Entry:
# ;  ; (  →  )  →  
