Blame - linux-x64/clang/python3/include/python3.9/unicodeobject.h - hafnium/prebuilts

blob: 500ce242e9f0e850f483695463701aed7d3cce14 [file] [log] [blame]

Olivier Deprez	f4ef2d0	2021-04-20 13:36:24 +0200	[diff] [blame]	1	#ifndef Py_UNICODEOBJECT_H
				2	#define Py_UNICODEOBJECT_H
				3
				4	#include <stdarg.h>
				5
				6	/*
				7
				8	Unicode implementation based on original code by Fredrik Lundh,
				9	modified by Marc-Andre Lemburg (mal@lemburg.com) according to the
				10	Unicode Integration Proposal. (See
				11	http://www.egenix.com/files/python/unicode-proposal.txt).
				12
				13	Copyright (c) Corporation for National Research Initiatives.
				14
				15
				16	Original header:
				17	--------------------------------------------------------------------
				18
				19	* Yet another Unicode string type for Python. This type supports the
				20	* 16-bit Basic Multilingual Plane (BMP) only.
				21	*
				22	* Written by Fredrik Lundh, January 1999.
				23	*
				24	* Copyright (c) 1999 by Secret Labs AB.
				25	* Copyright (c) 1999 by Fredrik Lundh.
				26	*
				27	* fredrik@pythonware.com
				28	* http://www.pythonware.com
				29	*
				30	* --------------------------------------------------------------------
				31	* This Unicode String Type is
				32	*
				33	* Copyright (c) 1999 by Secret Labs AB
				34	* Copyright (c) 1999 by Fredrik Lundh
				35	*
				36	* By obtaining, using, and/or copying this software and/or its
				37	* associated documentation, you agree that you have read, understood,
				38	* and will comply with the following terms and conditions:
				39	*
				40	* Permission to use, copy, modify, and distribute this software and its
				41	* associated documentation for any purpose and without fee is hereby
				42	* granted, provided that the above copyright notice appears in all
				43	* copies, and that both that copyright notice and this permission notice
				44	* appear in supporting documentation, and that the name of Secret Labs
				45	* AB or the author not be used in advertising or publicity pertaining to
				46	* distribution of the software without specific, written prior
				47	* permission.
				48	*
				49	* SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO
				50	* THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND
				51	* FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR
				52	* ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
				53	* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
				54	* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
				55	* OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
				56	* -------------------------------------------------------------------- */
				57
				58	#include <ctype.h>
				59
				60	/* === Internal API ======================================================= */
				61
				62	/* --- Internal Unicode Format -------------------------------------------- */
				63
				64	/* Python 3.x requires unicode */
				65	#define Py_USING_UNICODE
				66
				67	#ifndef SIZEOF_WCHAR_T
				68	#error Must define SIZEOF_WCHAR_T
				69	#endif
				70
				71	#define Py_UNICODE_SIZE SIZEOF_WCHAR_T
				72
				73	/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE.
				74	Otherwise, Unicode strings are stored as UCS-2 (with limited support
				75	for UTF-16) */
				76
				77	#if Py_UNICODE_SIZE >= 4
				78	#define Py_UNICODE_WIDE
				79	#endif
				80
				81	/* Set these flags if the platform has "wchar.h" and the
				82	wchar_t type is a 16-bit unsigned type */
				83	/* #define HAVE_WCHAR_H */
				84	/* #define HAVE_USABLE_WCHAR_T */
				85
				86	/* If the compiler provides a wchar_t type we try to support it
				87	through the interface functions PyUnicode_FromWideChar(),
				88	PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */
				89
				90	#ifdef HAVE_USABLE_WCHAR_T
				91	# ifndef HAVE_WCHAR_H
				92	# define HAVE_WCHAR_H
				93	# endif
				94	#endif
				95
				96	#ifdef HAVE_WCHAR_H
				97	# include <wchar.h>
				98	#endif
				99
				100	/* Py_UCS4 and Py_UCS2 are typedefs for the respective
				101	unicode representations. */
				102	typedef uint32_t Py_UCS4;
				103	typedef uint16_t Py_UCS2;
				104	typedef uint8_t Py_UCS1;
				105
				106	#ifdef __cplusplus
				107	extern "C" {
				108	#endif
				109
				110
				111	PyAPI_DATA(PyTypeObject) PyUnicode_Type;
				112	PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type;
				113
				114	#define PyUnicode_Check(op) \
				115	PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS)
				116	#define PyUnicode_CheckExact(op) Py_IS_TYPE(op, &PyUnicode_Type)
				117
				118	/* --- Constants ---------------------------------------------------------- */
				119
				120	/* This Unicode character will be used as replacement character during
				121	decoding if the errors argument is set to "replace". Note: the
				122	Unicode character U+FFFD is the official REPLACEMENT CHARACTER in
				123	Unicode 3.0. */
				124
				125	#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD)
				126
				127	/* === Public API ========================================================= */
				128
				129	/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */
				130	PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize(
				131	const char u, / UTF-8 encoded string */
				132	Py_ssize_t size /* size of buffer */
				133	);
				134
				135	/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated
				136	UTF-8 encoded bytes. The size is determined with strlen(). */
				137	PyAPI_FUNC(PyObject*) PyUnicode_FromString(
				138	const char u / UTF-8 encoded string */
				139	);
				140
				141	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				142	PyAPI_FUNC(PyObject*) PyUnicode_Substring(
				143	PyObject *str,
				144	Py_ssize_t start,
				145	Py_ssize_t end);
				146	#endif
				147
				148	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				149	/* Copy the string into a UCS4 buffer including the null character if copy_null
				150	is set. Return NULL and raise an exception on error. Raise a SystemError if
				151	the buffer is smaller than the string. Return buffer on success.
				152
				153	buflen is the length of the buffer in (Py_UCS4) characters. */
				154	PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4(
				155	PyObject *unicode,
				156	Py_UCS4* buffer,
				157	Py_ssize_t buflen,
				158	int copy_null);
				159
				160	/* Copy the string into a UCS4 buffer. A new buffer is allocated using
				161	* PyMem_Malloc; if this fails, NULL is returned with a memory error
				162	exception set. */
				163	PyAPI_FUNC(Py_UCS4) PyUnicode_AsUCS4Copy(PyObject unicode);
				164	#endif
				165
				166	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				167	/* Get the length of the Unicode object. */
				168
				169	PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength(
				170	PyObject *unicode
				171	);
				172	#endif
				173
				174	/* Get the number of Py_UNICODE units in the
				175	string representation. */
				176
				177	Py_DEPRECATED(3.3) PyAPI_FUNC(Py_ssize_t) PyUnicode_GetSize(
				178	PyObject unicode / Unicode object */
				179	);
				180
				181	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				182	/* Read a character from the string. */
				183
				184	PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar(
				185	PyObject *unicode,
				186	Py_ssize_t index
				187	);
				188
				189	/* Write a character to the string. The string must have been created through
				190	PyUnicode_New, must not be shared, and must not have been hashed yet.
				191
				192	Return 0 on success, -1 on error. */
				193
				194	PyAPI_FUNC(int) PyUnicode_WriteChar(
				195	PyObject *unicode,
				196	Py_ssize_t index,
				197	Py_UCS4 character
				198	);
				199	#endif
				200
				201	/* Resize a Unicode object. The length is the number of characters, except
				202	if the kind of the string is PyUnicode_WCHAR_KIND: in this case, the length
				203	is the number of Py_UNICODE characters.
				204
				205	*unicode is modified to point to the new (resized) object and 0
				206	returned on success.
				207
				208	Try to resize the string in place (which is usually faster than allocating
				209	a new string and copy characters), or create a new string.
				210
				211	Error handling is implemented as follows: an exception is set, -1
				212	is returned and *unicode left untouched.
				213
				214	WARNING: The function doesn't check string content, the result may not be a
				215	string in canonical representation. */
				216
				217	PyAPI_FUNC(int) PyUnicode_Resize(
				218	PyObject *unicode, / Pointer to the Unicode object */
				219	Py_ssize_t length /* New length */
				220	);
				221
				222	/* Decode obj to a Unicode object.
				223
				224	bytes, bytearray and other bytes-like objects are decoded according to the
				225	given encoding and error handler. The encoding and error handler can be
				226	NULL to have the interface use UTF-8 and "strict".
				227
				228	All other objects (including Unicode objects) raise an exception.
				229
				230	The API returns NULL in case of an error. The caller is responsible
				231	for decref'ing the returned objects.
				232
				233	*/
				234
				235	PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject(
				236	PyObject obj, / Object */
				237	const char encoding, / encoding */
				238	const char errors / error handling */
				239	);
				240
				241	/* Copy an instance of a Unicode subtype to a new true Unicode object if
				242	necessary. If obj is already a true Unicode object (not a subtype), return
				243	the reference with incremented refcount.
				244
				245	The API returns NULL in case of an error. The caller is responsible
				246	for decref'ing the returned objects.
				247
				248	*/
				249
				250	PyAPI_FUNC(PyObject*) PyUnicode_FromObject(
				251	PyObject obj / Object */
				252	);
				253
				254	PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV(
				255	const char format, / ASCII-encoded string */
				256	va_list vargs
				257	);
				258	PyAPI_FUNC(PyObject *) PyUnicode_FromFormat(
				259	const char format, / ASCII-encoded string */
				260	...
				261	);
				262
				263	PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **);
				264	PyAPI_FUNC(void) PyUnicode_InternImmortal(PyObject **);
				265	PyAPI_FUNC(PyObject *) PyUnicode_InternFromString(
				266	const char u / UTF-8 encoded string */
				267	);
				268
				269	/* Use only if you know it's a string */
				270	#define PyUnicode_CHECK_INTERNED(op) \
				271	(((PyASCIIObject *)(op))->state.interned)
				272
				273	/* --- wchar_t support for platforms which support it --------------------- */
				274
				275	#ifdef HAVE_WCHAR_H
				276
				277	/* Create a Unicode Object from the wchar_t buffer w of the given
				278	size.
				279
				280	The buffer is copied into the new object. */
				281
				282	PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar(
				283	const wchar_t w, / wchar_t buffer */
				284	Py_ssize_t size /* size of buffer */
				285	);
				286
				287	/* Copies the Unicode Object contents into the wchar_t buffer w. At
				288	most size wchar_t characters are copied.
				289
				290	Note that the resulting wchar_t string may or may not be
				291	0-terminated. It is the responsibility of the caller to make sure
				292	that the wchar_t string is 0-terminated in case this is required by
				293	the application.
				294
				295	Returns the number of wchar_t characters copied (excluding a
				296	possibly trailing 0-termination character) or -1 in case of an
				297	error. */
				298
				299	PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar(
				300	PyObject unicode, / Unicode object */
				301	wchar_t w, / wchar_t buffer */
				302	Py_ssize_t size /* size of buffer */
				303	);
				304
				305	/* Convert the Unicode object to a wide character string. The output string
				306	always ends with a nul character. If size is not NULL, write the number of
				307	wide characters (excluding the null character) into *size.
				308
				309	Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it)
				310	on success. On error, returns NULL, *size is undefined and raises a
				311	MemoryError. */
				312
				313	PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString(
				314	PyObject unicode, / Unicode object */
				315	Py_ssize_t size / number of characters of the result */
				316	);
				317
				318	#endif
				319
				320	/* --- Unicode ordinals --------------------------------------------------- */
				321
				322	/* Create a Unicode Object from the given Unicode code point ordinal.
				323
				324	The ordinal must be in range(0x110000). A ValueError is
				325	raised in case it is not.
				326
				327	*/
				328
				329	PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal);
				330
				331	/* === Builtin Codecs =====================================================
				332
				333	Many of these APIs take two arguments encoding and errors. These
				334	parameters encoding and errors have the same semantics as the ones
				335	of the builtin str() API.
				336
				337	Setting encoding to NULL causes the default encoding (UTF-8) to be used.
				338
				339	Error handling is set by errors which may also be set to NULL
				340	meaning to use the default handling defined for the codec. Default
				341	error handling for all builtin codecs is "strict" (ValueErrors are
				342	raised).
				343
				344	The codecs all use a similar interface. Only deviation from the
				345	generic ones are documented.
				346
				347	*/
				348
				349	/* --- Manage the default encoding ---------------------------------------- */
				350
				351	/* Returns "utf-8". */
				352	PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void);
				353
				354	/* --- Generic Codecs ----------------------------------------------------- */
				355
				356	/* Create a Unicode object by decoding the encoded string s of the
				357	given size. */
				358
				359	PyAPI_FUNC(PyObject*) PyUnicode_Decode(
				360	const char s, / encoded string */
				361	Py_ssize_t size, /* size of buffer */
				362	const char encoding, / encoding */
				363	const char errors / error handling */
				364	);
				365
				366	/* Decode a Unicode object unicode and return the result as Python
				367	object.
				368
				369	This API is DEPRECATED. The only supported standard encoding is rot13.
				370	Use PyCodec_Decode() to decode with rot13 and non-standard codecs
				371	that decode from str. */
				372
				373	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject(
				374	PyObject unicode, / Unicode object */
				375	const char encoding, / encoding */
				376	const char errors / error handling */
				377	);
				378
				379	/* Decode a Unicode object unicode and return the result as Unicode
				380	object.
				381
				382	This API is DEPRECATED. The only supported standard encoding is rot13.
				383	Use PyCodec_Decode() to decode with rot13 and non-standard codecs
				384	that decode from str to str. */
				385
				386	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode(
				387	PyObject unicode, / Unicode object */
				388	const char encoding, / encoding */
				389	const char errors / error handling */
				390	);
				391
				392	/* Encodes a Unicode object and returns the result as Python
				393	object.
				394
				395	This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString()
				396	since all standard encodings (except rot13) encode str to bytes.
				397	Use PyCodec_Encode() for encoding with rot13 and non-standard codecs
				398	that encode form str to non-bytes. */
				399
				400	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject(
				401	PyObject unicode, / Unicode object */
				402	const char encoding, / encoding */
				403	const char errors / error handling */
				404	);
				405
				406	/* Encodes a Unicode object and returns the result as Python string
				407	object. */
				408
				409	PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
				410	PyObject unicode, / Unicode object */
				411	const char encoding, / encoding */
				412	const char errors / error handling */
				413	);
				414
				415	/* Encodes a Unicode object and returns the result as Unicode
				416	object.
				417
				418	This API is DEPRECATED. The only supported standard encodings is rot13.
				419	Use PyCodec_Encode() to encode with rot13 and non-standard codecs
				420	that encode from str to str. */
				421
				422	Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode(
				423	PyObject unicode, / Unicode object */
				424	const char encoding, / encoding */
				425	const char errors / error handling */
				426	);
				427
				428	/* Build an encoding map. */
				429
				430	PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
				431	PyObject* string /* 256 character map */
				432	);
				433
				434	/* --- UTF-7 Codecs ------------------------------------------------------- */
				435
				436	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
				437	const char string, / UTF-7 encoded string */
				438	Py_ssize_t length, /* size of string */
				439	const char errors / error handling */
				440	);
				441
				442	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful(
				443	const char string, / UTF-7 encoded string */
				444	Py_ssize_t length, /* size of string */
				445	const char errors, / error handling */
				446	Py_ssize_t consumed / bytes consumed */
				447	);
				448
				449	/* --- UTF-8 Codecs ------------------------------------------------------- */
				450
				451	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8(
				452	const char string, / UTF-8 encoded string */
				453	Py_ssize_t length, /* size of string */
				454	const char errors / error handling */
				455	);
				456
				457	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful(
				458	const char string, / UTF-8 encoded string */
				459	Py_ssize_t length, /* size of string */
				460	const char errors, / error handling */
				461	Py_ssize_t consumed / bytes consumed */
				462	);
				463
				464	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String(
				465	PyObject unicode / Unicode object */
				466	);
				467
				468	/* --- UTF-32 Codecs ------------------------------------------------------ */
				469
				470	/* Decodes length bytes from a UTF-32 encoded buffer string and returns
				471	the corresponding Unicode object.
				472
				473	errors (if non-NULL) defines the error handling. It defaults
				474	to "strict".
				475
				476	If byteorder is non-NULL, the decoder starts decoding using the
				477	given byte order:
				478
				479	*byteorder == -1: little endian
				480	*byteorder == 0: native order
				481	*byteorder == 1: big endian
				482
				483	In native mode, the first four bytes of the stream are checked for a
				484	BOM mark. If found, the BOM mark is analysed, the byte order
				485	adjusted and the BOM skipped. In the other modes, no BOM mark
				486	interpretation is done. After completion, *byteorder is set to the
				487	current byte order at the end of input data.
				488
				489	If byteorder is NULL, the codec starts in native order mode.
				490
				491	*/
				492
				493	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32(
				494	const char string, / UTF-32 encoded string */
				495	Py_ssize_t length, /* size of string */
				496	const char errors, / error handling */
				497	int byteorder / pointer to byteorder to use
				498	0=native;-1=LE,1=BE; updated on
				499	exit */
				500	);
				501
				502	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful(
				503	const char string, / UTF-32 encoded string */
				504	Py_ssize_t length, /* size of string */
				505	const char errors, / error handling */
				506	int byteorder, / pointer to byteorder to use
				507	0=native;-1=LE,1=BE; updated on
				508	exit */
				509	Py_ssize_t consumed / bytes consumed */
				510	);
				511
				512	/* Returns a Python string using the UTF-32 encoding in native byte
				513	order. The string always starts with a BOM mark. */
				514
				515	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String(
				516	PyObject unicode / Unicode object */
				517	);
				518
				519	/* Returns a Python string object holding the UTF-32 encoded value of
				520	the Unicode data.
				521
				522	If byteorder is not 0, output is written according to the following
				523	byte order:
				524
				525	byteorder == -1: little endian
				526	byteorder == 0: native byte order (writes a BOM mark)
				527	byteorder == 1: big endian
				528
				529	If byteorder is 0, the output string will always start with the
				530	Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
				531	prepended.
				532
				533	*/
				534
				535	/* --- UTF-16 Codecs ------------------------------------------------------ */
				536
				537	/* Decodes length bytes from a UTF-16 encoded buffer string and returns
				538	the corresponding Unicode object.
				539
				540	errors (if non-NULL) defines the error handling. It defaults
				541	to "strict".
				542
				543	If byteorder is non-NULL, the decoder starts decoding using the
				544	given byte order:
				545
				546	*byteorder == -1: little endian
				547	*byteorder == 0: native order
				548	*byteorder == 1: big endian
				549
				550	In native mode, the first two bytes of the stream are checked for a
				551	BOM mark. If found, the BOM mark is analysed, the byte order
				552	adjusted and the BOM skipped. In the other modes, no BOM mark
				553	interpretation is done. After completion, *byteorder is set to the
				554	current byte order at the end of input data.
				555
				556	If byteorder is NULL, the codec starts in native order mode.
				557
				558	*/
				559
				560	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16(
				561	const char string, / UTF-16 encoded string */
				562	Py_ssize_t length, /* size of string */
				563	const char errors, / error handling */
				564	int byteorder / pointer to byteorder to use
				565	0=native;-1=LE,1=BE; updated on
				566	exit */
				567	);
				568
				569	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful(
				570	const char string, / UTF-16 encoded string */
				571	Py_ssize_t length, /* size of string */
				572	const char errors, / error handling */
				573	int byteorder, / pointer to byteorder to use
				574	0=native;-1=LE,1=BE; updated on
				575	exit */
				576	Py_ssize_t consumed / bytes consumed */
				577	);
				578
				579	/* Returns a Python string using the UTF-16 encoding in native byte
				580	order. The string always starts with a BOM mark. */
				581
				582	PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String(
				583	PyObject unicode / Unicode object */
				584	);
				585
				586	/* --- Unicode-Escape Codecs ---------------------------------------------- */
				587
				588	PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape(
				589	const char string, / Unicode-Escape encoded string */
				590	Py_ssize_t length, /* size of string */
				591	const char errors / error handling */
				592	);
				593
				594	PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString(
				595	PyObject unicode / Unicode object */
				596	);
				597
				598	/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */
				599
				600	PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape(
				601	const char string, / Raw-Unicode-Escape encoded string */
				602	Py_ssize_t length, /* size of string */
				603	const char errors / error handling */
				604	);
				605
				606	PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString(
				607	PyObject unicode / Unicode object */
				608	);
				609
				610	/* --- Latin-1 Codecs -----------------------------------------------------
				611
				612	Note: Latin-1 corresponds to the first 256 Unicode ordinals. */
				613
				614	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1(
				615	const char string, / Latin-1 encoded string */
				616	Py_ssize_t length, /* size of string */
				617	const char errors / error handling */
				618	);
				619
				620	PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String(
				621	PyObject unicode / Unicode object */
				622	);
				623
				624	/* --- ASCII Codecs -------------------------------------------------------
				625
				626	Only 7-bit ASCII data is excepted. All other codes generate errors.
				627
				628	*/
				629
				630	PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII(
				631	const char string, / ASCII encoded string */
				632	Py_ssize_t length, /* size of string */
				633	const char errors / error handling */
				634	);
				635
				636	PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString(
				637	PyObject unicode / Unicode object */
				638	);
				639
				640	/* --- Character Map Codecs -----------------------------------------------
				641
				642	This codec uses mappings to encode and decode characters.
				643
				644	Decoding mappings must map byte ordinals (integers in the range from 0 to
				645	255) to Unicode strings, integers (which are then interpreted as Unicode
				646	ordinals) or None. Unmapped data bytes (ones which cause a LookupError)
				647	as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined
				648	mapping" and cause an error.
				649
				650	Encoding mappings must map Unicode ordinal integers to bytes objects,
				651	integers in the range from 0 to 255 or None. Unmapped character
				652	ordinals (ones which cause a LookupError) as well as mapped to
				653	None are treated as "undefined mapping" and cause an error.
				654
				655	*/
				656
				657	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap(
				658	const char string, / Encoded string */
				659	Py_ssize_t length, /* size of string */
				660	PyObject mapping, / decoding mapping */
				661	const char errors / error handling */
				662	);
				663
				664	PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString(
				665	PyObject unicode, / Unicode object */
				666	PyObject mapping / encoding mapping */
				667	);
				668
				669	/* --- MBCS codecs for Windows -------------------------------------------- */
				670
				671	#ifdef MS_WINDOWS
				672	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS(
				673	const char string, / MBCS encoded string */
				674	Py_ssize_t length, /* size of string */
				675	const char errors / error handling */
				676	);
				677
				678	PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful(
				679	const char string, / MBCS encoded string */
				680	Py_ssize_t length, /* size of string */
				681	const char errors, / error handling */
				682	Py_ssize_t consumed / bytes consumed */
				683	);
				684
				685	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				686	PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful(
				687	int code_page, /* code page number */
				688	const char string, / encoded string */
				689	Py_ssize_t length, /* size of string */
				690	const char errors, / error handling */
				691	Py_ssize_t consumed / bytes consumed */
				692	);
				693	#endif
				694
				695	PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString(
				696	PyObject unicode / Unicode object */
				697	);
				698
				699	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				700	PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage(
				701	int code_page, /* code page number */
				702	PyObject unicode, / Unicode object */
				703	const char errors / error handling */
				704	);
				705	#endif
				706
				707	#endif /* MS_WINDOWS */
				708
				709	/* --- Locale encoding --------------------------------------------------- */
				710
				711	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				712	/* Decode a string from the current locale encoding. The decoder is strict if
				713	surrogateescape is equal to zero, otherwise it uses the 'surrogateescape'
				714	error handler (PEP 383) to escape undecodable bytes. If a byte sequence can
				715	be decoded as a surrogate character and surrogateescape is not equal to
				716	zero, the byte sequence is escaped using the 'surrogateescape' error handler
				717	instead of being decoded. str must end with a null character but cannot
				718	contain embedded null characters. */
				719
				720	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize(
				721	const char *str,
				722	Py_ssize_t len,
				723	const char *errors);
				724
				725	/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string
				726	length using strlen(). */
				727
				728	PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale(
				729	const char *str,
				730	const char *errors);
				731
				732	/* Encode a Unicode object to the current locale encoding. The encoder is
				733	strict is surrogateescape is equal to zero, otherwise the
				734	"surrogateescape" error handler is used. Return a bytes object. The string
				735	cannot contain embedded null characters. */
				736
				737	PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale(
				738	PyObject *unicode,
				739	const char *errors
				740	);
				741	#endif
				742
				743	/* --- File system encoding ---------------------------------------------- */
				744
				745	/* ParseTuple converter: encode str objects to bytes using
				746	PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */
				747
				748	PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject, void);
				749
				750	/* ParseTuple converter: decode bytes objects to unicode using
				751	PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */
				752
				753	PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject, void);
				754
				755	/* Decode a null-terminated string using Py_FileSystemDefaultEncoding
				756	and the "surrogateescape" error handler.
				757
				758	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				759	encoding.
				760
				761	Use PyUnicode_DecodeFSDefaultAndSize() if the string length is known.
				762	*/
				763
				764	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault(
				765	const char s / encoded string */
				766	);
				767
				768	/* Decode a string using Py_FileSystemDefaultEncoding
				769	and the "surrogateescape" error handler.
				770
				771	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				772	encoding.
				773	*/
				774
				775	PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize(
				776	const char s, / encoded string */
				777	Py_ssize_t size /* size */
				778	);
				779
				780	/* Encode a Unicode object to Py_FileSystemDefaultEncoding with the
				781	"surrogateescape" error handler, and return bytes.
				782
				783	If Py_FileSystemDefaultEncoding is not set, fall back to the locale
				784	encoding.
				785	*/
				786
				787	PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault(
				788	PyObject *unicode
				789	);
				790
				791	/* --- Methods & Slots ----------------------------------------------------
				792
				793	These are capable of handling Unicode objects and strings on input
				794	(we refer to them as strings in the descriptions) and return
				795	Unicode objects or integers as appropriate. */
				796
				797	/* Concat two strings giving a new Unicode string. */
				798
				799	PyAPI_FUNC(PyObject*) PyUnicode_Concat(
				800	PyObject left, / Left string */
				801	PyObject right / Right string */
				802	);
				803
				804	/* Concat two strings and put the result in *pleft
				805	(sets pleft to NULL on error) /
				806
				807	PyAPI_FUNC(void) PyUnicode_Append(
				808	PyObject *pleft, / Pointer to left string */
				809	PyObject right / Right string */
				810	);
				811
				812	/* Concat two strings, put the result in *pleft and drop the right object
				813	(sets pleft to NULL on error) /
				814
				815	PyAPI_FUNC(void) PyUnicode_AppendAndDel(
				816	PyObject *pleft, / Pointer to left string */
				817	PyObject right / Right string */
				818	);
				819
				820	/* Split a string giving a list of Unicode strings.
				821
				822	If sep is NULL, splitting will be done at all whitespace
				823	substrings. Otherwise, splits occur at the given separator.
				824
				825	At most maxsplit splits will be done. If negative, no limit is set.
				826
				827	Separators are not included in the resulting list.
				828
				829	*/
				830
				831	PyAPI_FUNC(PyObject*) PyUnicode_Split(
				832	PyObject s, / String to split */
				833	PyObject sep, / String separator */
				834	Py_ssize_t maxsplit /* Maxsplit count */
				835	);
				836
				837	/* Dito, but split at line breaks.
				838
				839	CRLF is considered to be one line break. Line breaks are not
				840	included in the resulting list. */
				841
				842	PyAPI_FUNC(PyObject*) PyUnicode_Splitlines(
				843	PyObject s, / String to split */
				844	int keepends /* If true, line end markers are included */
				845	);
				846
				847	/* Partition a string using a given separator. */
				848
				849	PyAPI_FUNC(PyObject*) PyUnicode_Partition(
				850	PyObject s, / String to partition */
				851	PyObject sep / String separator */
				852	);
				853
				854	/* Partition a string using a given separator, searching from the end of the
				855	string. */
				856
				857	PyAPI_FUNC(PyObject*) PyUnicode_RPartition(
				858	PyObject s, / String to partition */
				859	PyObject sep / String separator */
				860	);
				861
				862	/* Split a string giving a list of Unicode strings.
				863
				864	If sep is NULL, splitting will be done at all whitespace
				865	substrings. Otherwise, splits occur at the given separator.
				866
				867	At most maxsplit splits will be done. But unlike PyUnicode_Split
				868	PyUnicode_RSplit splits from the end of the string. If negative,
				869	no limit is set.
				870
				871	Separators are not included in the resulting list.
				872
				873	*/
				874
				875	PyAPI_FUNC(PyObject*) PyUnicode_RSplit(
				876	PyObject s, / String to split */
				877	PyObject sep, / String separator */
				878	Py_ssize_t maxsplit /* Maxsplit count */
				879	);
				880
				881	/* Translate a string by applying a character mapping table to it and
				882	return the resulting Unicode object.
				883
				884	The mapping table must map Unicode ordinal integers to Unicode strings,
				885	Unicode ordinal integers or None (causing deletion of the character).
				886
				887	Mapping tables may be dictionaries or sequences. Unmapped character
				888	ordinals (ones which cause a LookupError) are left untouched and
				889	are copied as-is.
				890
				891	*/
				892
				893	PyAPI_FUNC(PyObject *) PyUnicode_Translate(
				894	PyObject str, / String */
				895	PyObject table, / Translate table */
				896	const char errors / error handling */
				897	);
				898
				899	/* Join a sequence of strings using the given separator and return
				900	the resulting Unicode string. */
				901
				902	PyAPI_FUNC(PyObject*) PyUnicode_Join(
				903	PyObject separator, / Separator string */
				904	PyObject seq / Sequence object */
				905	);
				906
				907	/* Return 1 if substr matches str[start:end] at the given tail end, 0
				908	otherwise. */
				909
				910	PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch(
				911	PyObject str, / String */
				912	PyObject substr, / Prefix or Suffix string */
				913	Py_ssize_t start, /* Start index */
				914	Py_ssize_t end, /* Stop index */
				915	int direction /* Tail end: -1 prefix, +1 suffix */
				916	);
				917
				918	/* Return the first position of substr in str[start:end] using the
				919	given search direction or -1 if not found. -2 is returned in case
				920	an error occurred and an exception is set. */
				921
				922	PyAPI_FUNC(Py_ssize_t) PyUnicode_Find(
				923	PyObject str, / String */
				924	PyObject substr, / Substring to find */
				925	Py_ssize_t start, /* Start index */
				926	Py_ssize_t end, /* Stop index */
				927	int direction /* Find direction: +1 forward, -1 backward */
				928	);
				929
				930	#if !defined(Py_LIMITED_API) \|\| Py_LIMITED_API+0 >= 0x03030000
				931	/* Like PyUnicode_Find, but search for single character only. */
				932	PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar(
				933	PyObject *str,
				934	Py_UCS4 ch,
				935	Py_ssize_t start,
				936	Py_ssize_t end,
				937	int direction
				938	);
				939	#endif
				940
				941	/* Count the number of occurrences of substr in str[start:end]. */
				942
				943	PyAPI_FUNC(Py_ssize_t) PyUnicode_Count(
				944	PyObject str, / String */
				945	PyObject substr, / Substring to count */
				946	Py_ssize_t start, /* Start index */
				947	Py_ssize_t end /* Stop index */
				948	);
				949
				950	/* Replace at most maxcount occurrences of substr in str with replstr
				951	and return the resulting Unicode object. */
				952
				953	PyAPI_FUNC(PyObject *) PyUnicode_Replace(
				954	PyObject str, / String */
				955	PyObject substr, / Substring to find */
				956	PyObject replstr, / Substring to replace */
				957	Py_ssize_t maxcount /* Max. number of replacements to apply;
				958	-1 = all */
				959	);
				960
				961	/* Compare two strings and return -1, 0, 1 for less than, equal,
				962	greater than resp.
				963	Raise an exception and return -1 on error. */
				964
				965	PyAPI_FUNC(int) PyUnicode_Compare(
				966	PyObject left, / Left string */
				967	PyObject right / Right string */
				968	);
				969
				970	/* Compare a Unicode object with C string and return -1, 0, 1 for less than,
				971	equal, and greater than, respectively. It is best to pass only
				972	ASCII-encoded strings, but the function interprets the input string as
				973	ISO-8859-1 if it contains non-ASCII characters.
				974	This function does not raise exceptions. */
				975
				976	PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString(
				977	PyObject *left,
				978	const char right / ASCII-encoded string */
				979	);
				980
				981	/* Rich compare two strings and return one of the following:
				982
				983	- NULL in case an exception was raised
				984	- Py_True or Py_False for successful comparisons
				985	- Py_NotImplemented in case the type combination is unknown
				986
				987	Possible values for op:
				988
				989	Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE
				990
				991	*/
				992
				993	PyAPI_FUNC(PyObject *) PyUnicode_RichCompare(
				994	PyObject left, / Left string */
				995	PyObject right, / Right string */
				996	int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */
				997	);
				998
				999	/* Apply an argument tuple or dictionary to a format string and return
				1000	the resulting Unicode string. */
				1001
				1002	PyAPI_FUNC(PyObject *) PyUnicode_Format(
				1003	PyObject format, / Format string */
				1004	PyObject args / Argument tuple or dictionary */
				1005	);
				1006
				1007	/* Checks whether element is contained in container and return 1/0
				1008	accordingly.
				1009
				1010	element has to coerce to a one element Unicode string. -1 is
				1011	returned in case of an error. */
				1012
				1013	PyAPI_FUNC(int) PyUnicode_Contains(
				1014	PyObject container, / Container string */
				1015	PyObject element / Element string */
				1016	);
				1017
				1018	/* Checks whether argument is a valid identifier. */
				1019
				1020	PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s);
				1021
				1022	/* === Characters Type APIs =============================================== */
				1023
				1024	#ifndef Py_LIMITED_API
				1025	# define Py_CPYTHON_UNICODEOBJECT_H
				1026	# include "cpython/unicodeobject.h"
				1027	# undef Py_CPYTHON_UNICODEOBJECT_H
				1028	#endif
				1029
				1030	#ifdef __cplusplus
				1031	}
				1032	#endif
				1033	#endif /* !Py_UNICODEOBJECT_H */