# IBM-1047, IBM1047, CP1047 # last modified: 2018-04-20 # assembled by: Lorinczy Zsigmond # sources: # ftp://ftp.software.ibm.com/software/globalization/gcoc/attachments/CP01047.txt # ftp://ftp.software.ibm.com/software/globalization/gcoc/attachments/CP01047.pdf # https://www-01.ibm.com/software/globalization/cdra/appendix_g1.html # http://www.unicode.org/charts/PDF/U0000.pdf # http://www.unicode.org/charts/PDF/U0080.pdf # https://tools.ietf.org/html/rfc183 # # usage with libiconv (requires libiconv source and gperf): # # cd src/libiconv*/tools # make 8bit_tab_to_h # ./8bit_tab_to_h IBM-1047 cp1047 # # cd .. # make -fMakefile.devel all # # # add this to converters.h: # #include "cp1047.h" # # add this to encodings.def: # DEFENCODING(( "CP1047", # "IBM1047", # "IBM-1047", # ), # cp1047, # {cp1047_mbtowc, NULL }, { cp1047_wctomb, NULL }) # #ebc unicode IBM Explanation / replacement (for control characters) 0x00 0x0000 # NUL 0x01 0x0001 # SOH 0x02 0x0002 # STX 0x03 0x0003 # ETX 0x04 0x009C # SEL STRING TERMINATOR (ST) 0x05 0x0009 # HT 0x06 0x0086 # RNL START OF SELECTED AREA (SSA) 0x07 0x007F # DEL 0x08 0x0097 # GE END OF GUARDED ARE (EPA) 0x09 0x008D # SPS REVERSE LINE FEED (RI) 0x0A 0x008E # RPT SINGLE SHIFT TWO (SS2) 0x0B 0x000B # VT 0x0C 0x000C # FF 0x0D 0x000D # CR 0x0E 0x000E # SO 0x0F 0x000F # SI 0x10 0x0010 # DLE 0x11 0x0011 # DC1 0x12 0x0012 # DC2 0x13 0x0013 # DC3 0x14 0x009D # RES/ENP OPERATING SYSTEM COMMAND (OSC) 0x15 0x0085 # NL NEXT LINE (NEL) 0x16 0x0008 # BS 0x17 0x0087 # POC END OF SELECTED AREA (ESA) 0x18 0x0018 # CAN 0x19 0x0019 # EM 0x1A 0x0092 # UBS PRIVATE USE TWO (PU2) 0x1B 0x008F # CU1 SINGLE SHIFT THREE (SS3) 0x1C 0x001C # IFS FILE SEPARATOR (FS) 0x1D 0x001D # IGS GROUP SEPARATOR (GS) 0x1E 0x001E # IRS RECORD SEPARATOR (RS) 0x1F 0x001F # IUS/ITB UNIT SEPARATOR (US) 0x20 0x0080 # DS ??? 0x21 0x0081 # SOS ??? 0x22 0x0082 # FS BREAK PERMITTED HERE (BPH) 0x23 0x0083 # WUS NO BREAK HERE (NBH) 0x24 0x0084 # BYP/INP INDEX (IND) 0x25 0x000A # LF 0x26 0x0017 # ETB 0x27 0x001B # ESC 0x28 0x0088 # SA CHARACTER TABULATION SET (HTS) 0x29 0x0089 # SFE CHARACTER TABULATION WITH JUSTIFICATIO (HTJ) 0x2A 0x008A # SM/SW LINE TABULATION SET (VTS) 0x2B 0x008B # CSP PARTIAL LINE FORWARD (PLD) 0x2C 0x008C # MFA PARTIAL LINE BACKWARD (PLU) 0x2D 0x0005 # ENQ 0x2E 0x0006 # ACK 0x2F 0x0007 # BEL 0x30 0x0090 # Reserved DEVICE CONTROL STRING (DCS) 0x31 0x0091 # Reserved PRIVATE USE ONE (PU1) 0x32 0x0016 # SYN 0x33 0x0093 # IR SET TRANSMIT STATE (STS) 0x34 0x0094 # PP CANCEL CHARACTER (CCH) 0x35 0x0095 # TRN MESSAGE WAITING (MW) 0x36 0x0096 # NBS START OF GUARDED AREA (SPA) 0x37 0x0004 # EOT 0x38 0x0098 # SBS START OF STRING (SOS) 0x39 0x0099 # IT ??? 0x3A 0x009A # RFF SINGLE CHARACTER INTRODUCER (SCI) 0x3B 0x009B # CU3 CONTROL SEQUENCE INTRODUCER (CSI) 0x3C 0x0014 # DC4 0x3D 0x0015 # NAK 0x3E 0x009E # Reserved PRIVACY MESSAGE (CCH) 0x3F 0x001A # SUB 0x40 0x0020 # SP010000 Space 0x41 0x00A0 # SP300000 Required Space 0x42 0x00E2 # LA150000 a Circumflex Small 0x43 0x00E4 # LA170000 a Diaeresis Small 0x44 0x00E0 # LA130000 a Grave Small 0x45 0x00E1 # LA110000 a Acute Small 0x46 0x00E3 # LA190000 a Tilde Small 0x47 0x00E5 # LA270000 a Overcircle Small 0x48 0x00E7 # LC410000 c Cedilla Small 0x49 0x00F1 # LN190000 n Tilde Small 0x4A 0x00A2 # SC040000 Cent Sign 0x4B 0x002E # SP110000 Period/Full Stop 0x4C 0x003C # SA030000 Less Than Sign 0x4D 0x0028 # SP060000 Left Parenthesis 0x4E 0x002B # SA010000 Plus Sign 0x4F 0x007C # SM130000 Vertical Line/Logical OR 0x50 0x0026 # SM030000 Ampersand 0x51 0x00E9 # LE110000 e Acute Small 0x52 0x00EA # LE150000 e Circumflex Small 0x53 0x00EB # LE170000 e Diaeresis Small 0x54 0x00E8 # LE130000 e Grave Small 0x55 0x00ED # LI110000 i Acute Small 0x56 0x00EE # LI150000 i Circumflex Small 0x57 0x00EF # LI170000 i Diaeresis Small 0x58 0x00EC # LI130000 i Grave Small 0x59 0x00DF # LS610000 Sharp s Small 0x5A 0x0021 # SP020000 Exclamation Point 0x5B 0x0024 # SC030000 Dollar Sign 0x5C 0x002A # SM040000 Asterisk 0x5D 0x0029 # SP070000 Right Parenthesis 0x5E 0x003B # SP140000 Semicolon 0x5F 0x005E # SD150000 Circumflex Accent 0x60 0x002D # SP100000 Hyphen/Minus Sign 0x61 0x002F # SP120000 Slash 0x62 0x00C2 # LA160000 A Circumflex Capital 0x63 0x00C4 # LA180000 A Diaeresis Capital 0x64 0x00C0 # LA140000 A Grave Capital 0x65 0x00C1 # LA120000 A Acute Capital 0x66 0x00C3 # LA200000 A Tilde Capital 0x67 0x00C5 # LA280000 A Overcircle Capital 0x68 0x00C7 # LC420000 C Cedilla Capital 0x69 0x00D1 # LN200000 N Tilde Capital 0x6A 0x00A6 # SM650000 Vertical Line, Broken 0x6B 0x002C # SP080000 Comma 0x6C 0x0025 # SM020000 Percent Sign 0x6D 0x005F # SP090000 Underline/Continuous Underscore 0x6E 0x003E # SA050000 Greater Than Sign 0x6F 0x003F # SP150000 Question Mark 0x70 0x00F8 # LO610000 o Slash Small 0x71 0x00C9 # LE120000 E Acute Capital 0x72 0x00CA # LE160000 E Circumflex Capital 0x73 0x00CB # LE180000 E Diaeresis Capital 0x74 0x00C8 # LE140000 E Grave Capital 0x75 0x00CD # LI120000 I Acute Capital 0x76 0x00CE # LI160000 I Circumflex Capital 0x77 0x00CF # LI180000 I Diaeresis Capital 0x78 0x00CC # LI140000 I Grave Capital 0x79 0x0060 # SD130000 Grave Accent 0x7A 0x003A # SP130000 Colon 0x7B 0x0023 # SM010000 Number Sign 0x7C 0x0040 # SM050000 At Sign 0x7D 0x0027 # SP050000 Apostrophe 0x7E 0x003D # SA040000 Equal Sign 0x7F 0x0022 # SP040000 Quotation Marks 0x80 0x00D8 # LO620000 O Slash Capital 0x81 0x0061 # LA010000 a Small 0x82 0x0062 # LB010000 b Small 0x83 0x0063 # LC010000 c Small 0x84 0x0064 # LD010000 d Small 0x85 0x0065 # LE010000 e Small 0x86 0x0066 # LF010000 f Small 0x87 0x0067 # LG010000 g Small 0x88 0x0068 # LH010000 h Small 0x89 0x0069 # LI010000 i Small 0x8A 0x00AB # SP170000 Left Angle Quotes 0x8B 0x00BB # SP180000 Right Angle Quotes 0x8C 0x00F0 # LD630000 eth Icelandic Small 0x8D 0x00FD # LY110000 y Acute Small 0x8E 0x00FE # LT630000 Thorn Icelandic Small 0x8F 0x00B1 # SA020000 Plus or Minus Sign 0x90 0x00B0 # SM190000 Degree Symbol 0x91 0x006A # LJ010000 j Small 0x92 0x006B # LK010000 k Small 0x93 0x006C # LL010000 l Small 0x94 0x006D # LM010000 m Small 0x95 0x006E # LN010000 n Small 0x96 0x006F # LO010000 o Small 0x97 0x0070 # LP010000 p Small 0x98 0x0071 # LQ010000 q Small 0x99 0x0072 # LR010000 r Small 0x9A 0x00AA # SM210000 Ordinal Indicator, Feminine 0x9B 0x00BA # SM200000 Ordinal Indicator, Masculine 0x9C 0x00E6 # LA510000 ae Diphthong Small 0x9D 0x00B8 # SD410000 Cedilla or Sedila Accent 0x9E 0x00C6 # LA520000 ae Diphthong Capital 0x9F 0x00A4 # SC010000 International Currency Symbol 0xA0 0x00B5 # SM170000 Micro Symbol 0xA1 0x007E # SD190000 Tilde Accent 0xA2 0x0073 # LS010000 s Small 0xA3 0x0074 # LT010000 t Small 0xA4 0x0075 # LU010000 u Small 0xA5 0x0076 # LV010000 v Small 0xA6 0x0077 # LW010000 w Small 0xA7 0x0078 # LX010000 x Small 0xA8 0x0079 # LY010000 y Small 0xA9 0x007A # LZ010000 z Small 0xAA 0x00A1 # SP030000 Exclamation Point, Inverted 0xAB 0x00BF # SP160000 Question Mark, Inverted 0xAC 0x00D0 # LD620000 D Stroke Capital/Eth Icelandic Capital 0xAD 0x005B # SM060000 Left Bracket 0xAE 0x00DE # LT640000 Thorn Icelandic Capital 0xAF 0x00AE # SM530000 Registered Trademark Symbol 0xB0 0x00AC # SM660000 Logical NOT/End Of Line Symbol 0xB1 0x00A3 # SC020000 Pound Sterling Sign 0xB2 0x00A5 # SC050000 Yen Sign 0xB3 0x00B7 # SD630000 Middle Dot 0xB4 0x00A9 # SM520000 Copyright Symbol 0xB5 0x00A7 # SM240000 Section Symbol (USA)/Paragraph Symbol (Europe) 0xB6 0x00B6 # SM250000 Paragraph Symbol (USA) 0xB7 0x00BC # NF040000 One Quarter 0xB8 0x00BD # NF010000 One Half 0xB9 0x00BE # NF050000 Three Quarters 0xBA 0x00DD # LY120000 Y Acute Capital 0xBB 0x00A8 # SD170000 Diaeresis/Umlaut Accent 0xBC 0x00AF # SM150000 Overline 0xBD 0x005D # SM080000 Right Bracket 0xBE 0x00B4 # SD110000 Acute Accent 0xBF 0x00D7 # SA070000 Multiply Sign 0xC0 0x007B # SM110000 Left Brace 0xC1 0x0041 # LA020000 A Capital 0xC2 0x0042 # LB020000 B Capital 0xC3 0x0043 # LC020000 C Capital 0xC4 0x0044 # LD020000 D Capital 0xC5 0x0045 # LE020000 E Capital 0xC6 0x0046 # LF020000 F Capital 0xC7 0x0047 # LG020000 G Capital 0xC8 0x0048 # LH020000 H Capital 0xC9 0x0049 # LI020000 I Capital 0xCA 0x00AD # SP320000 Syllable Hyphen 0xCB 0x00F4 # LO150000 o Circumflex Small 0xCC 0x00F6 # LO170000 o Diaeresis Small 0xCD 0x00F2 # LO130000 o Grave Small 0xCE 0x00F3 # LO110000 o Acute Small 0xCF 0x00F5 # LO190000 o Tilde Small 0xD0 0x007D # SM140000 Right Brace 0xD1 0x004A # LJ020000 J Capital 0xD2 0x004B # LK020000 K Capital 0xD3 0x004C # LL020000 L Capital 0xD4 0x004D # LM020000 M Capital 0xD5 0x004E # LN020000 N Capital 0xD6 0x004F # LO020000 O Capital 0xD7 0x0050 # LP020000 P Capital 0xD8 0x0051 # LQ020000 Q Capital 0xD9 0x0052 # LR020000 R Capital 0xDA 0x00B9 # ND011000 One Superscript 0xDB 0x00FB # LU150000 u Circumflex Small 0xDC 0x00FC # LU170000 u Diaeresis Small 0xDD 0x00F9 # LU130000 u Grave Small 0xDE 0x00FA # LU110000 u Acute Small 0xDF 0x00FF # LY170000 y Diaeresis Small 0xE0 0x005C # SM070000 Backslash 0xE1 0x00F7 # SA060000 Divide Sign 0xE2 0x0053 # LS020000 S Capital 0xE3 0x0054 # LT020000 T Capital 0xE4 0x0055 # LU020000 U Capital 0xE5 0x0056 # LV020000 V Capital 0xE6 0x0057 # LW020000 W Capital 0xE7 0x0058 # LX020000 X Capital 0xE8 0x0059 # LY020000 Y Capital 0xE9 0x005A # LZ020000 Z Capital 0xEA 0x00B2 # ND021000 Two Superscript 0xEB 0x00D4 # LO160000 O Circumflex Capital 0xEC 0x00D6 # LO180000 O Diaeresis Capital 0xED 0x00D2 # LO140000 O Grave Capital 0xEE 0x00D3 # LO120000 O Acute Capital 0xEF 0x00D5 # LO200000 O Tilde Capital 0xF0 0x0030 # ND100000 Zero 0xF1 0x0031 # ND010000 One 0xF2 0x0032 # ND020000 Two 0xF3 0x0033 # ND030000 Three 0xF4 0x0034 # ND040000 Four 0xF5 0x0035 # ND050000 Five 0xF6 0x0036 # ND060000 Six 0xF7 0x0037 # ND070000 Seven 0xF8 0x0038 # ND080000 Eight 0xF9 0x0039 # ND090000 Nine 0xFA 0x00B3 # ND031000 Three Superscript 0xFB 0x00DB # LU160000 U Circumflex Capital 0xFC 0x00DC # LU180000 U Diaeresis Capital 0xFD 0x00D9 # LU140000 U Grave Capital 0xFE 0x00DA # LU120000 U Acute Capital 0xFF 0x009F # EO APPLICATION PROGRAM COMMAND (APC)