blob: d85afd6d5cf704b580a31be2ee79f5cdfe0247c1 [file] [log] [blame]
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001""" Encoding Aliases Support
2
3 This module is used by the encodings package search function to
4 map encodings names to module names.
5
6 Note that the search function normalizes the encoding names before
7 doing the lookup, so the mapping will have to map normalized
8 encoding names to module names.
9
10 Contents:
11
12 The following aliases dictionary contains mappings of all IANA
13 character set names for which the Python core library provides
14 codecs. In addition to these, a few Python specific codec
15 aliases have also been added.
16
17"""
18aliases = {
19
20 # Please keep this list sorted alphabetically by value !
21
22 # ascii codec
23 '646' : 'ascii',
24 'ansi_x3.4_1968' : 'ascii',
25 'ansi_x3_4_1968' : 'ascii', # some email headers use this non-standard name
26 'ansi_x3.4_1986' : 'ascii',
27 'cp367' : 'ascii',
28 'csascii' : 'ascii',
29 'ibm367' : 'ascii',
30 'iso646_us' : 'ascii',
31 'iso_646.irv_1991' : 'ascii',
32 'iso_ir_6' : 'ascii',
33 'us' : 'ascii',
34 'us_ascii' : 'ascii',
35
36 # base64_codec codec
37 'base64' : 'base64_codec',
38 'base_64' : 'base64_codec',
39
40 # big5 codec
41 'big5_tw' : 'big5',
42 'csbig5' : 'big5',
43
44 # big5hkscs codec
45 'big5_hkscs' : 'big5hkscs',
46 'hkscs' : 'big5hkscs',
47
48 # bz2_codec codec
49 'bz2' : 'bz2_codec',
50
51 # cp037 codec
52 '037' : 'cp037',
53 'csibm037' : 'cp037',
54 'ebcdic_cp_ca' : 'cp037',
55 'ebcdic_cp_nl' : 'cp037',
56 'ebcdic_cp_us' : 'cp037',
57 'ebcdic_cp_wt' : 'cp037',
58 'ibm037' : 'cp037',
59 'ibm039' : 'cp037',
60
61 # cp1026 codec
62 '1026' : 'cp1026',
63 'csibm1026' : 'cp1026',
64 'ibm1026' : 'cp1026',
65
66 # cp1125 codec
67 '1125' : 'cp1125',
68 'ibm1125' : 'cp1125',
69 'cp866u' : 'cp1125',
70 'ruscii' : 'cp1125',
71
72 # cp1140 codec
73 '1140' : 'cp1140',
74 'ibm1140' : 'cp1140',
75
76 # cp1250 codec
77 '1250' : 'cp1250',
78 'windows_1250' : 'cp1250',
79
80 # cp1251 codec
81 '1251' : 'cp1251',
82 'windows_1251' : 'cp1251',
83
84 # cp1252 codec
85 '1252' : 'cp1252',
86 'windows_1252' : 'cp1252',
87
88 # cp1253 codec
89 '1253' : 'cp1253',
90 'windows_1253' : 'cp1253',
91
92 # cp1254 codec
93 '1254' : 'cp1254',
94 'windows_1254' : 'cp1254',
95
96 # cp1255 codec
97 '1255' : 'cp1255',
98 'windows_1255' : 'cp1255',
99
100 # cp1256 codec
101 '1256' : 'cp1256',
102 'windows_1256' : 'cp1256',
103
104 # cp1257 codec
105 '1257' : 'cp1257',
106 'windows_1257' : 'cp1257',
107
108 # cp1258 codec
109 '1258' : 'cp1258',
110 'windows_1258' : 'cp1258',
111
112 # cp273 codec
113 '273' : 'cp273',
114 'ibm273' : 'cp273',
115 'csibm273' : 'cp273',
116
117 # cp424 codec
118 '424' : 'cp424',
119 'csibm424' : 'cp424',
120 'ebcdic_cp_he' : 'cp424',
121 'ibm424' : 'cp424',
122
123 # cp437 codec
124 '437' : 'cp437',
125 'cspc8codepage437' : 'cp437',
126 'ibm437' : 'cp437',
127
128 # cp500 codec
129 '500' : 'cp500',
130 'csibm500' : 'cp500',
131 'ebcdic_cp_be' : 'cp500',
132 'ebcdic_cp_ch' : 'cp500',
133 'ibm500' : 'cp500',
134
135 # cp775 codec
136 '775' : 'cp775',
137 'cspc775baltic' : 'cp775',
138 'ibm775' : 'cp775',
139
140 # cp850 codec
141 '850' : 'cp850',
142 'cspc850multilingual' : 'cp850',
143 'ibm850' : 'cp850',
144
145 # cp852 codec
146 '852' : 'cp852',
147 'cspcp852' : 'cp852',
148 'ibm852' : 'cp852',
149
150 # cp855 codec
151 '855' : 'cp855',
152 'csibm855' : 'cp855',
153 'ibm855' : 'cp855',
154
155 # cp857 codec
156 '857' : 'cp857',
157 'csibm857' : 'cp857',
158 'ibm857' : 'cp857',
159
160 # cp858 codec
161 '858' : 'cp858',
162 'csibm858' : 'cp858',
163 'ibm858' : 'cp858',
164
165 # cp860 codec
166 '860' : 'cp860',
167 'csibm860' : 'cp860',
168 'ibm860' : 'cp860',
169
170 # cp861 codec
171 '861' : 'cp861',
172 'cp_is' : 'cp861',
173 'csibm861' : 'cp861',
174 'ibm861' : 'cp861',
175
176 # cp862 codec
177 '862' : 'cp862',
178 'cspc862latinhebrew' : 'cp862',
179 'ibm862' : 'cp862',
180
181 # cp863 codec
182 '863' : 'cp863',
183 'csibm863' : 'cp863',
184 'ibm863' : 'cp863',
185
186 # cp864 codec
187 '864' : 'cp864',
188 'csibm864' : 'cp864',
189 'ibm864' : 'cp864',
190
191 # cp865 codec
192 '865' : 'cp865',
193 'csibm865' : 'cp865',
194 'ibm865' : 'cp865',
195
196 # cp866 codec
197 '866' : 'cp866',
198 'csibm866' : 'cp866',
199 'ibm866' : 'cp866',
200
201 # cp869 codec
202 '869' : 'cp869',
203 'cp_gr' : 'cp869',
204 'csibm869' : 'cp869',
205 'ibm869' : 'cp869',
206
207 # cp932 codec
208 '932' : 'cp932',
209 'ms932' : 'cp932',
210 'mskanji' : 'cp932',
211 'ms_kanji' : 'cp932',
212
213 # cp949 codec
214 '949' : 'cp949',
215 'ms949' : 'cp949',
216 'uhc' : 'cp949',
217
218 # cp950 codec
219 '950' : 'cp950',
220 'ms950' : 'cp950',
221
222 # euc_jis_2004 codec
223 'jisx0213' : 'euc_jis_2004',
224 'eucjis2004' : 'euc_jis_2004',
225 'euc_jis2004' : 'euc_jis_2004',
226
227 # euc_jisx0213 codec
228 'eucjisx0213' : 'euc_jisx0213',
229
230 # euc_jp codec
231 'eucjp' : 'euc_jp',
232 'ujis' : 'euc_jp',
233 'u_jis' : 'euc_jp',
234
235 # euc_kr codec
236 'euckr' : 'euc_kr',
237 'korean' : 'euc_kr',
238 'ksc5601' : 'euc_kr',
239 'ks_c_5601' : 'euc_kr',
240 'ks_c_5601_1987' : 'euc_kr',
241 'ksx1001' : 'euc_kr',
242 'ks_x_1001' : 'euc_kr',
243
244 # gb18030 codec
245 'gb18030_2000' : 'gb18030',
246
247 # gb2312 codec
248 'chinese' : 'gb2312',
249 'csiso58gb231280' : 'gb2312',
250 'euc_cn' : 'gb2312',
251 'euccn' : 'gb2312',
252 'eucgb2312_cn' : 'gb2312',
253 'gb2312_1980' : 'gb2312',
254 'gb2312_80' : 'gb2312',
255 'iso_ir_58' : 'gb2312',
256
257 # gbk codec
258 '936' : 'gbk',
259 'cp936' : 'gbk',
260 'ms936' : 'gbk',
261
262 # hex_codec codec
263 'hex' : 'hex_codec',
264
265 # hp_roman8 codec
266 'roman8' : 'hp_roman8',
267 'r8' : 'hp_roman8',
268 'csHPRoman8' : 'hp_roman8',
269 'cp1051' : 'hp_roman8',
270 'ibm1051' : 'hp_roman8',
271
272 # hz codec
273 'hzgb' : 'hz',
274 'hz_gb' : 'hz',
275 'hz_gb_2312' : 'hz',
276
277 # iso2022_jp codec
278 'csiso2022jp' : 'iso2022_jp',
279 'iso2022jp' : 'iso2022_jp',
280 'iso_2022_jp' : 'iso2022_jp',
281
282 # iso2022_jp_1 codec
283 'iso2022jp_1' : 'iso2022_jp_1',
284 'iso_2022_jp_1' : 'iso2022_jp_1',
285
286 # iso2022_jp_2 codec
287 'iso2022jp_2' : 'iso2022_jp_2',
288 'iso_2022_jp_2' : 'iso2022_jp_2',
289
290 # iso2022_jp_2004 codec
291 'iso_2022_jp_2004' : 'iso2022_jp_2004',
292 'iso2022jp_2004' : 'iso2022_jp_2004',
293
294 # iso2022_jp_3 codec
295 'iso2022jp_3' : 'iso2022_jp_3',
296 'iso_2022_jp_3' : 'iso2022_jp_3',
297
298 # iso2022_jp_ext codec
299 'iso2022jp_ext' : 'iso2022_jp_ext',
300 'iso_2022_jp_ext' : 'iso2022_jp_ext',
301
302 # iso2022_kr codec
303 'csiso2022kr' : 'iso2022_kr',
304 'iso2022kr' : 'iso2022_kr',
305 'iso_2022_kr' : 'iso2022_kr',
306
307 # iso8859_10 codec
308 'csisolatin6' : 'iso8859_10',
309 'iso_8859_10' : 'iso8859_10',
310 'iso_8859_10_1992' : 'iso8859_10',
311 'iso_ir_157' : 'iso8859_10',
312 'l6' : 'iso8859_10',
313 'latin6' : 'iso8859_10',
314
315 # iso8859_11 codec
316 'thai' : 'iso8859_11',
317 'iso_8859_11' : 'iso8859_11',
318 'iso_8859_11_2001' : 'iso8859_11',
319
320 # iso8859_13 codec
321 'iso_8859_13' : 'iso8859_13',
322 'l7' : 'iso8859_13',
323 'latin7' : 'iso8859_13',
324
325 # iso8859_14 codec
326 'iso_8859_14' : 'iso8859_14',
327 'iso_8859_14_1998' : 'iso8859_14',
328 'iso_celtic' : 'iso8859_14',
329 'iso_ir_199' : 'iso8859_14',
330 'l8' : 'iso8859_14',
331 'latin8' : 'iso8859_14',
332
333 # iso8859_15 codec
334 'iso_8859_15' : 'iso8859_15',
335 'l9' : 'iso8859_15',
336 'latin9' : 'iso8859_15',
337
338 # iso8859_16 codec
339 'iso_8859_16' : 'iso8859_16',
340 'iso_8859_16_2001' : 'iso8859_16',
341 'iso_ir_226' : 'iso8859_16',
342 'l10' : 'iso8859_16',
343 'latin10' : 'iso8859_16',
344
345 # iso8859_2 codec
346 'csisolatin2' : 'iso8859_2',
347 'iso_8859_2' : 'iso8859_2',
348 'iso_8859_2_1987' : 'iso8859_2',
349 'iso_ir_101' : 'iso8859_2',
350 'l2' : 'iso8859_2',
351 'latin2' : 'iso8859_2',
352
353 # iso8859_3 codec
354 'csisolatin3' : 'iso8859_3',
355 'iso_8859_3' : 'iso8859_3',
356 'iso_8859_3_1988' : 'iso8859_3',
357 'iso_ir_109' : 'iso8859_3',
358 'l3' : 'iso8859_3',
359 'latin3' : 'iso8859_3',
360
361 # iso8859_4 codec
362 'csisolatin4' : 'iso8859_4',
363 'iso_8859_4' : 'iso8859_4',
364 'iso_8859_4_1988' : 'iso8859_4',
365 'iso_ir_110' : 'iso8859_4',
366 'l4' : 'iso8859_4',
367 'latin4' : 'iso8859_4',
368
369 # iso8859_5 codec
370 'csisolatincyrillic' : 'iso8859_5',
371 'cyrillic' : 'iso8859_5',
372 'iso_8859_5' : 'iso8859_5',
373 'iso_8859_5_1988' : 'iso8859_5',
374 'iso_ir_144' : 'iso8859_5',
375
376 # iso8859_6 codec
377 'arabic' : 'iso8859_6',
378 'asmo_708' : 'iso8859_6',
379 'csisolatinarabic' : 'iso8859_6',
380 'ecma_114' : 'iso8859_6',
381 'iso_8859_6' : 'iso8859_6',
382 'iso_8859_6_1987' : 'iso8859_6',
383 'iso_ir_127' : 'iso8859_6',
384
385 # iso8859_7 codec
386 'csisolatingreek' : 'iso8859_7',
387 'ecma_118' : 'iso8859_7',
388 'elot_928' : 'iso8859_7',
389 'greek' : 'iso8859_7',
390 'greek8' : 'iso8859_7',
391 'iso_8859_7' : 'iso8859_7',
392 'iso_8859_7_1987' : 'iso8859_7',
393 'iso_ir_126' : 'iso8859_7',
394
395 # iso8859_8 codec
396 'csisolatinhebrew' : 'iso8859_8',
397 'hebrew' : 'iso8859_8',
398 'iso_8859_8' : 'iso8859_8',
399 'iso_8859_8_1988' : 'iso8859_8',
400 'iso_ir_138' : 'iso8859_8',
401
402 # iso8859_9 codec
403 'csisolatin5' : 'iso8859_9',
404 'iso_8859_9' : 'iso8859_9',
405 'iso_8859_9_1989' : 'iso8859_9',
406 'iso_ir_148' : 'iso8859_9',
407 'l5' : 'iso8859_9',
408 'latin5' : 'iso8859_9',
409
410 # johab codec
411 'cp1361' : 'johab',
412 'ms1361' : 'johab',
413
414 # koi8_r codec
415 'cskoi8r' : 'koi8_r',
416
417 # kz1048 codec
418 'kz_1048' : 'kz1048',
419 'rk1048' : 'kz1048',
420 'strk1048_2002' : 'kz1048',
421
422 # latin_1 codec
423 #
424 # Note that the latin_1 codec is implemented internally in C and a
425 # lot faster than the charmap codec iso8859_1 which uses the same
426 # encoding. This is why we discourage the use of the iso8859_1
427 # codec and alias it to latin_1 instead.
428 #
429 '8859' : 'latin_1',
430 'cp819' : 'latin_1',
431 'csisolatin1' : 'latin_1',
432 'ibm819' : 'latin_1',
433 'iso8859' : 'latin_1',
434 'iso8859_1' : 'latin_1',
435 'iso_8859_1' : 'latin_1',
436 'iso_8859_1_1987' : 'latin_1',
437 'iso_ir_100' : 'latin_1',
438 'l1' : 'latin_1',
439 'latin' : 'latin_1',
440 'latin1' : 'latin_1',
441
442 # mac_cyrillic codec
443 'maccyrillic' : 'mac_cyrillic',
444
445 # mac_greek codec
446 'macgreek' : 'mac_greek',
447
448 # mac_iceland codec
449 'maciceland' : 'mac_iceland',
450
451 # mac_latin2 codec
452 'maccentraleurope' : 'mac_latin2',
453 'mac_centeuro' : 'mac_latin2',
454 'maclatin2' : 'mac_latin2',
455
456 # mac_roman codec
457 'macintosh' : 'mac_roman',
458 'macroman' : 'mac_roman',
459
460 # mac_turkish codec
461 'macturkish' : 'mac_turkish',
462
463 # mbcs codec
464 'ansi' : 'mbcs',
465 'dbcs' : 'mbcs',
466
467 # ptcp154 codec
468 'csptcp154' : 'ptcp154',
469 'pt154' : 'ptcp154',
470 'cp154' : 'ptcp154',
471 'cyrillic_asian' : 'ptcp154',
472
473 # quopri_codec codec
474 'quopri' : 'quopri_codec',
475 'quoted_printable' : 'quopri_codec',
476 'quotedprintable' : 'quopri_codec',
477
478 # rot_13 codec
479 'rot13' : 'rot_13',
480
481 # shift_jis codec
482 'csshiftjis' : 'shift_jis',
483 'shiftjis' : 'shift_jis',
484 'sjis' : 'shift_jis',
485 's_jis' : 'shift_jis',
486
487 # shift_jis_2004 codec
488 'shiftjis2004' : 'shift_jis_2004',
489 'sjis_2004' : 'shift_jis_2004',
490 's_jis_2004' : 'shift_jis_2004',
491
492 # shift_jisx0213 codec
493 'shiftjisx0213' : 'shift_jisx0213',
494 'sjisx0213' : 'shift_jisx0213',
495 's_jisx0213' : 'shift_jisx0213',
496
497 # tis_620 codec
498 'tis620' : 'tis_620',
499 'tis_620_0' : 'tis_620',
500 'tis_620_2529_0' : 'tis_620',
501 'tis_620_2529_1' : 'tis_620',
502 'iso_ir_166' : 'tis_620',
503
504 # utf_16 codec
505 'u16' : 'utf_16',
506 'utf16' : 'utf_16',
507
508 # utf_16_be codec
509 'unicodebigunmarked' : 'utf_16_be',
510 'utf_16be' : 'utf_16_be',
511
512 # utf_16_le codec
513 'unicodelittleunmarked' : 'utf_16_le',
514 'utf_16le' : 'utf_16_le',
515
516 # utf_32 codec
517 'u32' : 'utf_32',
518 'utf32' : 'utf_32',
519
520 # utf_32_be codec
521 'utf_32be' : 'utf_32_be',
522
523 # utf_32_le codec
524 'utf_32le' : 'utf_32_le',
525
526 # utf_7 codec
527 'u7' : 'utf_7',
528 'utf7' : 'utf_7',
529 'unicode_1_1_utf_7' : 'utf_7',
530
531 # utf_8 codec
532 'u8' : 'utf_8',
533 'utf' : 'utf_8',
534 'utf8' : 'utf_8',
535 'utf8_ucs2' : 'utf_8',
536 'utf8_ucs4' : 'utf_8',
537 'cp65001' : 'utf_8',
538
539 # uu_codec codec
540 'uu' : 'uu_codec',
541
542 # zlib_codec codec
543 'zip' : 'zlib_codec',
544 'zlib' : 'zlib_codec',
545
546 # temporary mac CJK aliases, will be replaced by proper codecs in 3.1
547 'x_mac_japanese' : 'shift_jis',
548 'x_mac_korean' : 'euc_kr',
549 'x_mac_simp_chinese' : 'gb2312',
550 'x_mac_trad_chinese' : 'big5',
551}