From cba249d0808add98aa682fa29a8037399c414292 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Nov 2023 22:50:41 +0100 Subject: [PATCH 1/5] Revert "gh-111089: Use PyUnicode_AsUTF8() in Argument Clinic (#111585)" This reverts commit d9b606b3d04fc56fb0bcc479d7d6c14562edb5e2. --- Modules/_io/clinic/_iomodule.c.h | 30 +- Modules/_io/clinic/fileio.c.h | 9 +- Modules/_io/clinic/textio.c.h | 23 +- Modules/_io/clinic/winconsoleio.c.h | 9 +- .../clinic/multiprocessing.c.h | 9 +- Modules/_multiprocessing/clinic/semaphore.c.h | 9 +- Modules/_sqlite/clinic/connection.c.h | 86 ++++- Modules/_sqlite/clinic/cursor.c.h | 9 +- Modules/_sqlite/clinic/module.c.h | 9 +- Modules/_testcapi/clinic/exceptions.c.h | 16 +- Modules/cjkcodecs/clinic/multibytecodec.c.h | 16 +- Modules/clinic/_codecsmodule.c.h | 310 +++++++++++++++--- Modules/clinic/_cursesmodule.c.h | 30 +- Modules/clinic/_dbmmodule.c.h | 9 +- Modules/clinic/_elementtree.c.h | 9 +- Modules/clinic/_gdbmmodule.c.h | 9 +- Modules/clinic/_hashopenssl.c.h | 9 +- Modules/clinic/_localemodule.c.h | 72 +++- Modules/clinic/_pickle.c.h | 44 ++- Modules/clinic/_ssl.c.h | 37 ++- Modules/clinic/_testclinic.c.h | 16 +- Modules/clinic/_tkinter.c.h | 100 +++++- Modules/clinic/posixmodule.c.h | 16 +- Modules/clinic/pyexpat.c.h | 37 ++- Modules/clinic/symtablemodule.c.h | 9 +- Objects/clinic/bytearrayobject.c.h | 30 +- Objects/clinic/bytesobject.c.h | 30 +- Objects/clinic/floatobject.c.h | 9 +- Objects/clinic/memoryobject.c.h | 9 +- Objects/clinic/unicodeobject.c.h | 30 +- Python/clinic/Python-tokenize.c.h | 9 +- Python/clinic/bltinmodule.c.h | 9 +- Python/clinic/sysmodule.c.h | 9 +- Tools/clinic/clinic.py | 20 +- 34 files changed, 907 insertions(+), 180 deletions(-) diff --git a/Modules/_io/clinic/_iomodule.c.h b/Modules/_io/clinic/_iomodule.c.h index 68a8a20514027d..112408a95df036 100644 --- a/Modules/_io/clinic/_iomodule.c.h +++ b/Modules/_io/clinic/_iomodule.c.h @@ -188,10 +188,15 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw _PyArg_BadArgument("open", "argument 'mode'", "str", args[1]); goto exit; } - mode = PyUnicode_AsUTF8(args[1]); + Py_ssize_t mode_length; + mode = PyUnicode_AsUTF8AndSize(args[1], &mode_length); if (mode == NULL) { goto exit; } + if (strlen(mode) != (size_t)mode_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -210,10 +215,15 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw encoding = NULL; } else if (PyUnicode_Check(args[3])) { - encoding = PyUnicode_AsUTF8(args[3]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[3], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("open", "argument 'encoding'", "str or None", args[3]); @@ -228,10 +238,15 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw errors = NULL; } else if (PyUnicode_Check(args[4])) { - errors = PyUnicode_AsUTF8(args[4]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[4], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("open", "argument 'errors'", "str or None", args[4]); @@ -246,10 +261,15 @@ _io_open(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw newline = NULL; } else if (PyUnicode_Check(args[5])) { - newline = PyUnicode_AsUTF8(args[5]); + Py_ssize_t newline_length; + newline = PyUnicode_AsUTF8AndSize(args[5], &newline_length); if (newline == NULL) { goto exit; } + if (strlen(newline) != (size_t)newline_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("open", "argument 'newline'", "str or None", args[5]); @@ -384,4 +404,4 @@ _io_open_code(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=feb173d5f2bfb98a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=5d60f4e778a600a4 input=a9049054013a1b77]*/ diff --git a/Modules/_io/clinic/fileio.c.h b/Modules/_io/clinic/fileio.c.h index 6f5d660affd569..cf3ba28b066cf7 100644 --- a/Modules/_io/clinic/fileio.c.h +++ b/Modules/_io/clinic/fileio.c.h @@ -107,10 +107,15 @@ _io_FileIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("FileIO", "argument 'mode'", "str", fastargs[1]); goto exit; } - mode = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t mode_length; + mode = PyUnicode_AsUTF8AndSize(fastargs[1], &mode_length); if (mode == NULL) { goto exit; } + if (strlen(mode) != (size_t)mode_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -523,4 +528,4 @@ _io_FileIO_isatty(fileio *self, PyObject *Py_UNUSED(ignored)) #ifndef _IO_FILEIO_TRUNCATE_METHODDEF #define _IO_FILEIO_TRUNCATE_METHODDEF #endif /* !defined(_IO_FILEIO_TRUNCATE_METHODDEF) */ -/*[clinic end generated code: output=27cff9d0a618edb6 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=1c0f4a36f76b0c6a input=a9049054013a1b77]*/ diff --git a/Modules/_io/clinic/textio.c.h b/Modules/_io/clinic/textio.c.h index 25c301ecdb6dab..b24a1669f9b344 100644 --- a/Modules/_io/clinic/textio.c.h +++ b/Modules/_io/clinic/textio.c.h @@ -185,10 +185,15 @@ _io__TextIOBase_write(PyObject *self, PyTypeObject *cls, PyObject *const *args, _PyArg_BadArgument("write", "argument 1", "str", args[0]); goto exit; } - s = PyUnicode_AsUTF8(args[0]); + Py_ssize_t s_length; + s = PyUnicode_AsUTF8AndSize(args[0], &s_length); if (s == NULL) { goto exit; } + if (strlen(s) != (size_t)s_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _io__TextIOBase_write_impl(self, cls, s); exit: @@ -470,10 +475,15 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs) encoding = NULL; } else if (PyUnicode_Check(fastargs[1])) { - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("TextIOWrapper", "argument 'encoding'", "str or None", fastargs[1]); @@ -494,10 +504,15 @@ _io_TextIOWrapper___init__(PyObject *self, PyObject *args, PyObject *kwargs) newline = NULL; } else if (PyUnicode_Check(fastargs[3])) { - newline = PyUnicode_AsUTF8(fastargs[3]); + Py_ssize_t newline_length; + newline = PyUnicode_AsUTF8AndSize(fastargs[3], &newline_length); if (newline == NULL) { goto exit; } + if (strlen(newline) != (size_t)newline_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("TextIOWrapper", "argument 'newline'", "str or None", fastargs[3]); @@ -965,4 +980,4 @@ _io_TextIOWrapper_close(textio *self, PyObject *Py_UNUSED(ignored)) { return _io_TextIOWrapper_close_impl(self); } -/*[clinic end generated code: output=c9ffb48a5278cbd4 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=e58ce89b7354e77a input=a9049054013a1b77]*/ diff --git a/Modules/_io/clinic/winconsoleio.c.h b/Modules/_io/clinic/winconsoleio.c.h index 8609fc9ede95fe..6cab295c44611d 100644 --- a/Modules/_io/clinic/winconsoleio.c.h +++ b/Modules/_io/clinic/winconsoleio.c.h @@ -106,10 +106,15 @@ _io__WindowsConsoleIO___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("_WindowsConsoleIO", "argument 'mode'", "str", fastargs[1]); goto exit; } - mode = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t mode_length; + mode = PyUnicode_AsUTF8AndSize(fastargs[1], &mode_length); if (mode == NULL) { goto exit; } + if (strlen(mode) != (size_t)mode_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -452,4 +457,4 @@ _io__WindowsConsoleIO_isatty(winconsoleio *self, PyObject *Py_UNUSED(ignored)) #ifndef _IO__WINDOWSCONSOLEIO_ISATTY_METHODDEF #define _IO__WINDOWSCONSOLEIO_ISATTY_METHODDEF #endif /* !defined(_IO__WINDOWSCONSOLEIO_ISATTY_METHODDEF) */ -/*[clinic end generated code: output=76408dd67894bc9c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=04108fc26b187386 input=a9049054013a1b77]*/ diff --git a/Modules/_multiprocessing/clinic/multiprocessing.c.h b/Modules/_multiprocessing/clinic/multiprocessing.c.h index 9133d5d8bb04fd..6d4f5c2afcfd39 100644 --- a/Modules/_multiprocessing/clinic/multiprocessing.c.h +++ b/Modules/_multiprocessing/clinic/multiprocessing.c.h @@ -138,10 +138,15 @@ _multiprocessing_sem_unlink(PyObject *module, PyObject *arg) _PyArg_BadArgument("sem_unlink", "argument", "str", arg); goto exit; } - name = PyUnicode_AsUTF8(arg); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(arg, &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _multiprocessing_sem_unlink_impl(module, name); exit: @@ -159,4 +164,4 @@ _multiprocessing_sem_unlink(PyObject *module, PyObject *arg) #ifndef _MULTIPROCESSING_SEND_METHODDEF #define _MULTIPROCESSING_SEND_METHODDEF #endif /* !defined(_MULTIPROCESSING_SEND_METHODDEF) */ -/*[clinic end generated code: output=c6735cbc59b6f324 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=73b4cb8428d816da input=a9049054013a1b77]*/ diff --git a/Modules/_multiprocessing/clinic/semaphore.c.h b/Modules/_multiprocessing/clinic/semaphore.c.h index ae340c2f57af20..7c855113113c20 100644 --- a/Modules/_multiprocessing/clinic/semaphore.c.h +++ b/Modules/_multiprocessing/clinic/semaphore.c.h @@ -266,10 +266,15 @@ _multiprocessing_SemLock(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("SemLock", "argument 'name'", "str", fastargs[3]); goto exit; } - name = PyUnicode_AsUTF8(fastargs[3]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(fastargs[3], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } unlink = PyObject_IsTrue(fastargs[4]); if (unlink < 0) { goto exit; @@ -537,4 +542,4 @@ _multiprocessing_SemLock___exit__(SemLockObject *self, PyObject *const *args, Py #ifndef _MULTIPROCESSING_SEMLOCK___EXIT___METHODDEF #define _MULTIPROCESSING_SEMLOCK___EXIT___METHODDEF #endif /* !defined(_MULTIPROCESSING_SEMLOCK___EXIT___METHODDEF) */ -/*[clinic end generated code: output=fd94dc907e6ab57f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d57992037e6770b6 input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/clinic/connection.c.h b/Modules/_sqlite/clinic/connection.c.h index 8e1a57415c2bd9..db5eb77891e52e 100644 --- a/Modules/_sqlite/clinic/connection.c.h +++ b/Modules/_sqlite/clinic/connection.c.h @@ -297,18 +297,28 @@ blobopen(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyO _PyArg_BadArgument("blobopen", "argument 1", "str", args[0]); goto exit; } - table = PyUnicode_AsUTF8(args[0]); + Py_ssize_t table_length; + table = PyUnicode_AsUTF8AndSize(args[0], &table_length); if (table == NULL) { goto exit; } + if (strlen(table) != (size_t)table_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!PyUnicode_Check(args[1])) { _PyArg_BadArgument("blobopen", "argument 2", "str", args[1]); goto exit; } - col = PyUnicode_AsUTF8(args[1]); + Py_ssize_t col_length; + col = PyUnicode_AsUTF8AndSize(args[1], &col_length); if (col == NULL) { goto exit; } + if (strlen(col) != (size_t)col_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!sqlite3_int64_converter(args[2], &row)) { goto exit; } @@ -328,10 +338,15 @@ blobopen(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, PyO _PyArg_BadArgument("blobopen", "argument 'name'", "str", args[4]); goto exit; } - name = PyUnicode_AsUTF8(args[4]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[4], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_kwonly: return_value = blobopen_impl(self, table, col, row, readonly, name); @@ -484,10 +499,15 @@ pysqlite_connection_create_function(pysqlite_Connection *self, PyTypeObject *cls _PyArg_BadArgument("create_function", "argument 'name'", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } narg = PyLong_AsInt(args[1]); if (narg == -1 && PyErr_Occurred()) { goto exit; @@ -562,10 +582,15 @@ create_window_function(pysqlite_Connection *self, PyTypeObject *cls, PyObject *c _PyArg_BadArgument("create_window_function", "argument 1", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } num_params = PyLong_AsInt(args[1]); if (num_params == -1 && PyErr_Occurred()) { goto exit; @@ -663,10 +688,15 @@ pysqlite_connection_create_aggregate(pysqlite_Connection *self, PyTypeObject *cl _PyArg_BadArgument("create_aggregate", "argument 'name'", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } n_arg = PyLong_AsInt(args[1]); if (n_arg == -1 && PyErr_Occurred()) { goto exit; @@ -1033,10 +1063,15 @@ pysqlite_connection_load_extension(pysqlite_Connection *self, PyObject *const *a _PyArg_BadArgument("load_extension", "argument 1", "str", args[0]); goto exit; } - extension_name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t extension_name_length; + extension_name = PyUnicode_AsUTF8AndSize(args[0], &extension_name_length); if (extension_name == NULL) { goto exit; } + if (strlen(extension_name) != (size_t)extension_name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_kwonly; } @@ -1044,10 +1079,15 @@ pysqlite_connection_load_extension(pysqlite_Connection *self, PyObject *const *a entrypoint = NULL; } else if (PyUnicode_Check(args[1])) { - entrypoint = PyUnicode_AsUTF8(args[1]); + Py_ssize_t entrypoint_length; + entrypoint = PyUnicode_AsUTF8AndSize(args[1], &entrypoint_length); if (entrypoint == NULL) { goto exit; } + if (strlen(entrypoint) != (size_t)entrypoint_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("load_extension", "argument 'entrypoint'", "str or None", args[1]); @@ -1266,10 +1306,15 @@ pysqlite_connection_backup(pysqlite_Connection *self, PyObject *const *args, Py_ _PyArg_BadArgument("backup", "argument 'name'", "str", args[3]); goto exit; } - name = PyUnicode_AsUTF8(args[3]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[3], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -1335,10 +1380,15 @@ pysqlite_connection_create_collation(pysqlite_Connection *self, PyTypeObject *cl _PyArg_BadArgument("create_collation", "argument 1", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } callable = args[1]; return_value = pysqlite_connection_create_collation_impl(self, cls, name, callable); @@ -1412,10 +1462,15 @@ serialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, Py _PyArg_BadArgument("serialize", "argument 'name'", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_kwonly: return_value = serialize_impl(self, name); @@ -1510,10 +1565,15 @@ deserialize(pysqlite_Connection *self, PyObject *const *args, Py_ssize_t nargs, _PyArg_BadArgument("deserialize", "argument 'name'", "str", args[1]); goto exit; } - name = PyUnicode_AsUTF8(args[1]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[1], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_kwonly: return_value = deserialize_impl(self, &data, name); @@ -1758,4 +1818,4 @@ getconfig(pysqlite_Connection *self, PyObject *arg) #ifndef DESERIALIZE_METHODDEF #define DESERIALIZE_METHODDEF #endif /* !defined(DESERIALIZE_METHODDEF) */ -/*[clinic end generated code: output=7d2a4d9272f7cb9e input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90b5b9c14261b8d7 input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/clinic/cursor.c.h b/Modules/_sqlite/clinic/cursor.c.h index 7a5850312ee789..a13e0d0745b58d 100644 --- a/Modules/_sqlite/clinic/cursor.c.h +++ b/Modules/_sqlite/clinic/cursor.c.h @@ -135,10 +135,15 @@ pysqlite_cursor_executescript(pysqlite_Cursor *self, PyObject *arg) _PyArg_BadArgument("executescript", "argument", "str", arg); goto exit; } - sql_script = PyUnicode_AsUTF8(arg); + Py_ssize_t sql_script_length; + sql_script = PyUnicode_AsUTF8AndSize(arg, &sql_script_length); if (sql_script == NULL) { goto exit; } + if (strlen(sql_script) != (size_t)sql_script_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = pysqlite_cursor_executescript_impl(self, sql_script); exit: @@ -308,4 +313,4 @@ pysqlite_cursor_close(pysqlite_Cursor *self, PyObject *Py_UNUSED(ignored)) { return pysqlite_cursor_close_impl(self); } -/*[clinic end generated code: output=c772882c7df587ea input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a8ce095c3c80cf65 input=a9049054013a1b77]*/ diff --git a/Modules/_sqlite/clinic/module.c.h b/Modules/_sqlite/clinic/module.c.h index d3c7ad8b7ca998..529dc4e281e0eb 100644 --- a/Modules/_sqlite/clinic/module.c.h +++ b/Modules/_sqlite/clinic/module.c.h @@ -60,10 +60,15 @@ pysqlite_complete_statement(PyObject *module, PyObject *const *args, Py_ssize_t _PyArg_BadArgument("complete_statement", "argument 'statement'", "str", args[0]); goto exit; } - statement = PyUnicode_AsUTF8(args[0]); + Py_ssize_t statement_length; + statement = PyUnicode_AsUTF8AndSize(args[0], &statement_length); if (statement == NULL) { goto exit; } + if (strlen(statement) != (size_t)statement_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = pysqlite_complete_statement_impl(module, statement); exit: @@ -203,4 +208,4 @@ pysqlite_adapt(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=19016e67830c19eb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=457ab0fdbb9e1880 input=a9049054013a1b77]*/ diff --git a/Modules/_testcapi/clinic/exceptions.c.h b/Modules/_testcapi/clinic/exceptions.c.h index 80c52f48ced7b3..a797444c1a72b9 100644 --- a/Modules/_testcapi/clinic/exceptions.c.h +++ b/Modules/_testcapi/clinic/exceptions.c.h @@ -112,10 +112,15 @@ _testcapi_make_exception_with_doc(PyObject *module, PyObject *const *args, Py_ss _PyArg_BadArgument("make_exception_with_doc", "argument 'name'", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } @@ -124,10 +129,15 @@ _testcapi_make_exception_with_doc(PyObject *module, PyObject *const *args, Py_ss _PyArg_BadArgument("make_exception_with_doc", "argument 'doc'", "str", args[1]); goto exit; } - doc = PyUnicode_AsUTF8(args[1]); + Py_ssize_t doc_length; + doc = PyUnicode_AsUTF8AndSize(args[1], &doc_length); if (doc == NULL) { goto exit; } + if (strlen(doc) != (size_t)doc_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -446,4 +456,4 @@ _testcapi_unstable_exc_prep_reraise_star(PyObject *module, PyObject *const *args exit: return return_value; } -/*[clinic end generated code: output=6f2b4f773e0ae755 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0b11ef105030a48e input=a9049054013a1b77]*/ diff --git a/Modules/cjkcodecs/clinic/multibytecodec.c.h b/Modules/cjkcodecs/clinic/multibytecodec.c.h index fec223930cd48c..305ade17b1f1aa 100644 --- a/Modules/cjkcodecs/clinic/multibytecodec.c.h +++ b/Modules/cjkcodecs/clinic/multibytecodec.c.h @@ -73,10 +73,15 @@ _multibytecodec_MultibyteCodec_encode(MultibyteCodecObject *self, PyObject *cons errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("encode", "argument 'errors'", "str or None", args[1]); @@ -156,10 +161,15 @@ _multibytecodec_MultibyteCodec_decode(MultibyteCodecObject *self, PyObject *cons errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("decode", "argument 'errors'", "str or None", args[1]); @@ -672,4 +682,4 @@ PyDoc_STRVAR(_multibytecodec___create_codec__doc__, #define _MULTIBYTECODEC___CREATE_CODEC_METHODDEF \ {"__create_codec", (PyCFunction)_multibytecodec___create_codec, METH_O, _multibytecodec___create_codec__doc__}, -/*[clinic end generated code: output=b35a5c3797e0e54a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=219a363662d2fbff input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_codecsmodule.c.h b/Modules/clinic/_codecsmodule.c.h index bf17596274baf2..12fea806ab5209 100644 --- a/Modules/clinic/_codecsmodule.c.h +++ b/Modules/clinic/_codecsmodule.c.h @@ -54,10 +54,15 @@ _codecs_lookup(PyObject *module, PyObject *arg) _PyArg_BadArgument("lookup", "argument", "str", arg); goto exit; } - encoding = PyUnicode_AsUTF8(arg); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(arg, &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _codecs_lookup_impl(module, encoding); exit: @@ -131,10 +136,15 @@ _codecs_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje _PyArg_BadArgument("encode", "argument 'encoding'", "str", args[1]); goto exit; } - encoding = PyUnicode_AsUTF8(args[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -143,10 +153,15 @@ _codecs_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje _PyArg_BadArgument("encode", "argument 'errors'", "str", args[2]); goto exit; } - errors = PyUnicode_AsUTF8(args[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = _codecs_encode_impl(module, obj, encoding, errors); @@ -221,10 +236,15 @@ _codecs_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje _PyArg_BadArgument("decode", "argument 'encoding'", "str", args[1]); goto exit; } - encoding = PyUnicode_AsUTF8(args[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -233,10 +253,15 @@ _codecs_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje _PyArg_BadArgument("decode", "argument 'errors'", "str", args[2]); goto exit; } - errors = PyUnicode_AsUTF8(args[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = _codecs_decode_impl(module, obj, encoding, errors); @@ -286,10 +311,15 @@ _codecs_escape_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("escape_decode", "argument 2", "str or None", args[1]); @@ -341,10 +371,15 @@ _codecs_escape_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("escape_encode", "argument 2", "str or None", args[1]); @@ -390,10 +425,15 @@ _codecs_utf_7_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_7_decode", "argument 2", "str or None", args[1]); @@ -451,10 +491,15 @@ _codecs_utf_8_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_8_decode", "argument 2", "str or None", args[1]); @@ -512,10 +557,15 @@ _codecs_utf_16_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_decode", "argument 2", "str or None", args[1]); @@ -573,10 +623,15 @@ _codecs_utf_16_le_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_le_decode", "argument 2", "str or None", args[1]); @@ -634,10 +689,15 @@ _codecs_utf_16_be_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_be_decode", "argument 2", "str or None", args[1]); @@ -697,10 +757,15 @@ _codecs_utf_16_ex_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_ex_decode", "argument 2", "str or None", args[1]); @@ -765,10 +830,15 @@ _codecs_utf_32_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_decode", "argument 2", "str or None", args[1]); @@ -826,10 +896,15 @@ _codecs_utf_32_le_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_le_decode", "argument 2", "str or None", args[1]); @@ -887,10 +962,15 @@ _codecs_utf_32_be_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_be_decode", "argument 2", "str or None", args[1]); @@ -950,10 +1030,15 @@ _codecs_utf_32_ex_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_ex_decode", "argument 2", "str or None", args[1]); @@ -1028,10 +1113,15 @@ _codecs_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ssize_ errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("unicode_escape_decode", "argument 2", "str or None", args[1]); @@ -1099,10 +1189,15 @@ _codecs_raw_unicode_escape_decode(PyObject *module, PyObject *const *args, Py_ss errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("raw_unicode_escape_decode", "argument 2", "str or None", args[1]); @@ -1159,10 +1254,15 @@ _codecs_latin_1_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("latin_1_decode", "argument 2", "str or None", args[1]); @@ -1212,10 +1312,15 @@ _codecs_ascii_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("ascii_decode", "argument 2", "str or None", args[1]); @@ -1266,10 +1371,15 @@ _codecs_charmap_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("charmap_decode", "argument 2", "str or None", args[1]); @@ -1326,10 +1436,15 @@ _codecs_mbcs_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("mbcs_decode", "argument 2", "str or None", args[1]); @@ -1391,10 +1506,15 @@ _codecs_oem_decode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("oem_decode", "argument 2", "str or None", args[1]); @@ -1461,10 +1581,15 @@ _codecs_code_page_decode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[2])) { - errors = PyUnicode_AsUTF8(args[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("code_page_decode", "argument 3", "str or None", args[2]); @@ -1533,10 +1658,15 @@ _codecs_readbuffer_encode(PyObject *module, PyObject *const *args, Py_ssize_t na errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("readbuffer_encode", "argument 2", "str or None", args[1]); @@ -1588,10 +1718,15 @@ _codecs_utf_7_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_7_encode", "argument 2", "str or None", args[1]); @@ -1638,10 +1773,15 @@ _codecs_utf_8_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_8_encode", "argument 2", "str or None", args[1]); @@ -1689,10 +1829,15 @@ _codecs_utf_16_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_encode", "argument 2", "str or None", args[1]); @@ -1746,10 +1891,15 @@ _codecs_utf_16_le_encode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_le_encode", "argument 2", "str or None", args[1]); @@ -1796,10 +1946,15 @@ _codecs_utf_16_be_encode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_16_be_encode", "argument 2", "str or None", args[1]); @@ -1847,10 +2002,15 @@ _codecs_utf_32_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_encode", "argument 2", "str or None", args[1]); @@ -1904,10 +2064,15 @@ _codecs_utf_32_le_encode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_le_encode", "argument 2", "str or None", args[1]); @@ -1954,10 +2119,15 @@ _codecs_utf_32_be_encode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("utf_32_be_encode", "argument 2", "str or None", args[1]); @@ -2004,10 +2174,15 @@ _codecs_unicode_escape_encode(PyObject *module, PyObject *const *args, Py_ssize_ errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("unicode_escape_encode", "argument 2", "str or None", args[1]); @@ -2054,10 +2229,15 @@ _codecs_raw_unicode_escape_encode(PyObject *module, PyObject *const *args, Py_ss errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("raw_unicode_escape_encode", "argument 2", "str or None", args[1]); @@ -2104,10 +2284,15 @@ _codecs_latin_1_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("latin_1_encode", "argument 2", "str or None", args[1]); @@ -2154,10 +2339,15 @@ _codecs_ascii_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("ascii_encode", "argument 2", "str or None", args[1]); @@ -2205,10 +2395,15 @@ _codecs_charmap_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("charmap_encode", "argument 2", "str or None", args[1]); @@ -2288,10 +2483,15 @@ _codecs_mbcs_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("mbcs_encode", "argument 2", "str or None", args[1]); @@ -2341,10 +2541,15 @@ _codecs_oem_encode(PyObject *module, PyObject *const *args, Py_ssize_t nargs) errors = NULL; } else if (PyUnicode_Check(args[1])) { - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("oem_encode", "argument 2", "str or None", args[1]); @@ -2400,10 +2605,15 @@ _codecs_code_page_encode(PyObject *module, PyObject *const *args, Py_ssize_t nar errors = NULL; } else if (PyUnicode_Check(args[2])) { - errors = PyUnicode_AsUTF8(args[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("code_page_encode", "argument 3", "str or None", args[2]); @@ -2449,10 +2659,15 @@ _codecs_register_error(PyObject *module, PyObject *const *args, Py_ssize_t nargs _PyArg_BadArgument("register_error", "argument 1", "str", args[0]); goto exit; } - errors = PyUnicode_AsUTF8(args[0]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[0], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } handler = args[1]; return_value = _codecs_register_error_impl(module, errors, handler); @@ -2485,10 +2700,15 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) _PyArg_BadArgument("lookup_error", "argument", "str", arg); goto exit; } - name = PyUnicode_AsUTF8(arg); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(arg, &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _codecs_lookup_error_impl(module, name); exit: @@ -2518,4 +2738,4 @@ _codecs_lookup_error(PyObject *module, PyObject *arg) #ifndef _CODECS_CODE_PAGE_ENCODE_METHODDEF #define _CODECS_CODE_PAGE_ENCODE_METHODDEF #endif /* !defined(_CODECS_CODE_PAGE_ENCODE_METHODDEF) */ -/*[clinic end generated code: output=5c95a170d813a46f input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d8d9e372f7ccba35 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_cursesmodule.c.h b/Modules/clinic/_cursesmodule.c.h index 409c61578572f9..f7e0aaf7b23649 100644 --- a/Modules/clinic/_cursesmodule.c.h +++ b/Modules/clinic/_cursesmodule.c.h @@ -2726,10 +2726,15 @@ _curses_setupterm(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyO term = NULL; } else if (PyUnicode_Check(args[0])) { - term = PyUnicode_AsUTF8(args[0]); + Py_ssize_t term_length; + term = PyUnicode_AsUTF8AndSize(args[0], &term_length); if (term == NULL) { goto exit; } + if (strlen(term) != (size_t)term_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("setupterm", "argument 'term'", "str or None", args[0]); @@ -3921,10 +3926,15 @@ _curses_tigetflag(PyObject *module, PyObject *arg) _PyArg_BadArgument("tigetflag", "argument", "str", arg); goto exit; } - capname = PyUnicode_AsUTF8(arg); + Py_ssize_t capname_length; + capname = PyUnicode_AsUTF8AndSize(arg, &capname_length); if (capname == NULL) { goto exit; } + if (strlen(capname) != (size_t)capname_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _curses_tigetflag_impl(module, capname); exit: @@ -3959,10 +3969,15 @@ _curses_tigetnum(PyObject *module, PyObject *arg) _PyArg_BadArgument("tigetnum", "argument", "str", arg); goto exit; } - capname = PyUnicode_AsUTF8(arg); + Py_ssize_t capname_length; + capname = PyUnicode_AsUTF8AndSize(arg, &capname_length); if (capname == NULL) { goto exit; } + if (strlen(capname) != (size_t)capname_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _curses_tigetnum_impl(module, capname); exit: @@ -3997,10 +4012,15 @@ _curses_tigetstr(PyObject *module, PyObject *arg) _PyArg_BadArgument("tigetstr", "argument", "str", arg); goto exit; } - capname = PyUnicode_AsUTF8(arg); + Py_ssize_t capname_length; + capname = PyUnicode_AsUTF8AndSize(arg, &capname_length); if (capname == NULL) { goto exit; } + if (strlen(capname) != (size_t)capname_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _curses_tigetstr_impl(module, capname); exit: @@ -4298,4 +4318,4 @@ _curses_has_extended_color_support(PyObject *module, PyObject *Py_UNUSED(ignored #ifndef _CURSES_USE_DEFAULT_COLORS_METHODDEF #define _CURSES_USE_DEFAULT_COLORS_METHODDEF #endif /* !defined(_CURSES_USE_DEFAULT_COLORS_METHODDEF) */ -/*[clinic end generated code: output=555e266fc4838612 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=96887782374f070a input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h index 4b4baf8cd49d34..5a4aba2825e03a 100644 --- a/Modules/clinic/_dbmmodule.c.h +++ b/Modules/clinic/_dbmmodule.c.h @@ -196,10 +196,15 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("open", "argument 2", "str", args[1]); goto exit; } - flags = PyUnicode_AsUTF8(args[1]); + Py_ssize_t flags_length; + flags = PyUnicode_AsUTF8AndSize(args[1], &flags_length); if (flags == NULL) { goto exit; } + if (strlen(flags) != (size_t)flags_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (nargs < 3) { goto skip_optional; } @@ -213,4 +218,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=48183905532205c2 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=96fdd4bd7bd256c5 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_elementtree.c.h b/Modules/clinic/_elementtree.c.h index a14b3815e56423..02375c8a61e73e 100644 --- a/Modules/clinic/_elementtree.c.h +++ b/Modules/clinic/_elementtree.c.h @@ -1131,10 +1131,15 @@ _elementtree_XMLParser___init__(PyObject *self, PyObject *args, PyObject *kwargs encoding = NULL; } else if (PyUnicode_Check(fastargs[1])) { - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("XMLParser", "argument 'encoding'", "str or None", fastargs[1]); @@ -1214,4 +1219,4 @@ _elementtree_XMLParser__setevents(XMLParserObject *self, PyObject *const *args, exit: return return_value; } -/*[clinic end generated code: output=399d9d5c9435070b input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8fdaa17d3262800a input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h index ab7288ee22360c..c7164e519d0e7d 100644 --- a/Modules/clinic/_gdbmmodule.c.h +++ b/Modules/clinic/_gdbmmodule.c.h @@ -318,10 +318,15 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("open", "argument 2", "str", args[1]); goto exit; } - flags = PyUnicode_AsUTF8(args[1]); + Py_ssize_t flags_length; + flags = PyUnicode_AsUTF8AndSize(args[1], &flags_length); if (flags == NULL) { goto exit; } + if (strlen(flags) != (size_t)flags_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (nargs < 3) { goto skip_optional; } @@ -335,4 +340,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=725cafd8b2d8cfdb input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c5ee922363d5a81f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_hashopenssl.c.h b/Modules/clinic/_hashopenssl.c.h index e360e98500729b..58650dff288444 100644 --- a/Modules/clinic/_hashopenssl.c.h +++ b/Modules/clinic/_hashopenssl.c.h @@ -1278,10 +1278,15 @@ pbkdf2_hmac(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("pbkdf2_hmac", "argument 'hash_name'", "str", args[0]); goto exit; } - hash_name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t hash_name_length; + hash_name = PyUnicode_AsUTF8AndSize(args[0], &hash_name_length); if (hash_name == NULL) { goto exit; } + if (strlen(hash_name) != (size_t)hash_name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (PyObject_GetBuffer(args[1], &password, PyBUF_SIMPLE) != 0) { goto exit; } @@ -1819,4 +1824,4 @@ _hashlib_compare_digest(PyObject *module, PyObject *const *args, Py_ssize_t narg #ifndef _HASHLIB_SCRYPT_METHODDEF #define _HASHLIB_SCRYPT_METHODDEF #endif /* !defined(_HASHLIB_SCRYPT_METHODDEF) */ -/*[clinic end generated code: output=bc372898eaa3e000 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=b7eddeb3d6ccdeec input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_localemodule.c.h b/Modules/clinic/_localemodule.c.h index 2663b28fa72bd9..5e0880b0d0bb4c 100644 --- a/Modules/clinic/_localemodule.c.h +++ b/Modules/clinic/_localemodule.c.h @@ -37,10 +37,15 @@ _locale_setlocale(PyObject *module, PyObject *const *args, Py_ssize_t nargs) locale = NULL; } else if (PyUnicode_Check(args[1])) { - locale = PyUnicode_AsUTF8(args[1]); + Py_ssize_t locale_length; + locale = PyUnicode_AsUTF8AndSize(args[1], &locale_length); if (locale == NULL) { goto exit; } + if (strlen(locale) != (size_t)locale_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("setlocale", "argument 2", "str or None", args[1]); @@ -225,10 +230,15 @@ _locale_gettext(PyObject *module, PyObject *arg) _PyArg_BadArgument("gettext", "argument", "str", arg); goto exit; } - in = PyUnicode_AsUTF8(arg); + Py_ssize_t in_length; + in = PyUnicode_AsUTF8AndSize(arg, &in_length); if (in == NULL) { goto exit; } + if (strlen(in) != (size_t)in_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _locale_gettext_impl(module, in); exit: @@ -267,10 +277,15 @@ _locale_dgettext(PyObject *module, PyObject *const *args, Py_ssize_t nargs) domain = NULL; } else if (PyUnicode_Check(args[0])) { - domain = PyUnicode_AsUTF8(args[0]); + Py_ssize_t domain_length; + domain = PyUnicode_AsUTF8AndSize(args[0], &domain_length); if (domain == NULL) { goto exit; } + if (strlen(domain) != (size_t)domain_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("dgettext", "argument 1", "str or None", args[0]); @@ -280,10 +295,15 @@ _locale_dgettext(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("dgettext", "argument 2", "str", args[1]); goto exit; } - in = PyUnicode_AsUTF8(args[1]); + Py_ssize_t in_length; + in = PyUnicode_AsUTF8AndSize(args[1], &in_length); if (in == NULL) { goto exit; } + if (strlen(in) != (size_t)in_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _locale_dgettext_impl(module, domain, in); exit: @@ -322,10 +342,15 @@ _locale_dcgettext(PyObject *module, PyObject *const *args, Py_ssize_t nargs) domain = NULL; } else if (PyUnicode_Check(args[0])) { - domain = PyUnicode_AsUTF8(args[0]); + Py_ssize_t domain_length; + domain = PyUnicode_AsUTF8AndSize(args[0], &domain_length); if (domain == NULL) { goto exit; } + if (strlen(domain) != (size_t)domain_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("dcgettext", "argument 1", "str or None", args[0]); @@ -335,10 +360,15 @@ _locale_dcgettext(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("dcgettext", "argument 2", "str", args[1]); goto exit; } - msgid = PyUnicode_AsUTF8(args[1]); + Py_ssize_t msgid_length; + msgid = PyUnicode_AsUTF8AndSize(args[1], &msgid_length); if (msgid == NULL) { goto exit; } + if (strlen(msgid) != (size_t)msgid_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } category = PyLong_AsInt(args[2]); if (category == -1 && PyErr_Occurred()) { goto exit; @@ -375,10 +405,15 @@ _locale_textdomain(PyObject *module, PyObject *arg) domain = NULL; } else if (PyUnicode_Check(arg)) { - domain = PyUnicode_AsUTF8(arg); + Py_ssize_t domain_length; + domain = PyUnicode_AsUTF8AndSize(arg, &domain_length); if (domain == NULL) { goto exit; } + if (strlen(domain) != (size_t)domain_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("textdomain", "argument", "str or None", arg); @@ -421,10 +456,15 @@ _locale_bindtextdomain(PyObject *module, PyObject *const *args, Py_ssize_t nargs _PyArg_BadArgument("bindtextdomain", "argument 1", "str", args[0]); goto exit; } - domain = PyUnicode_AsUTF8(args[0]); + Py_ssize_t domain_length; + domain = PyUnicode_AsUTF8AndSize(args[0], &domain_length); if (domain == NULL) { goto exit; } + if (strlen(domain) != (size_t)domain_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } dirname_obj = args[1]; return_value = _locale_bindtextdomain_impl(module, domain, dirname_obj); @@ -463,18 +503,28 @@ _locale_bind_textdomain_codeset(PyObject *module, PyObject *const *args, Py_ssiz _PyArg_BadArgument("bind_textdomain_codeset", "argument 1", "str", args[0]); goto exit; } - domain = PyUnicode_AsUTF8(args[0]); + Py_ssize_t domain_length; + domain = PyUnicode_AsUTF8AndSize(args[0], &domain_length); if (domain == NULL) { goto exit; } + if (strlen(domain) != (size_t)domain_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (args[1] == Py_None) { codeset = NULL; } else if (PyUnicode_Check(args[1])) { - codeset = PyUnicode_AsUTF8(args[1]); + Py_ssize_t codeset_length; + codeset = PyUnicode_AsUTF8AndSize(args[1], &codeset_length); if (codeset == NULL) { goto exit; } + if (strlen(codeset) != (size_t)codeset_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("bind_textdomain_codeset", "argument 2", "str or None", args[1]); @@ -545,4 +595,4 @@ _locale_getencoding(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF #endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */ -/*[clinic end generated code: output=14a4bffed066ebb3 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=034a3c219466d207 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_pickle.c.h b/Modules/clinic/_pickle.c.h index 75edfd03e8ca23..932ace190e6059 100644 --- a/Modules/clinic/_pickle.c.h +++ b/Modules/clinic/_pickle.c.h @@ -466,10 +466,15 @@ _pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("Unpickler", "argument 'encoding'", "str", fastargs[2]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -479,10 +484,15 @@ _pickle_Unpickler___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("Unpickler", "argument 'errors'", "str", fastargs[3]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[3]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[3], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -860,10 +870,15 @@ _pickle_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("load", "argument 'encoding'", "str", args[2]); goto exit; } - encoding = PyUnicode_AsUTF8(args[2]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[2], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -873,10 +888,15 @@ _pickle_load(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("load", "argument 'errors'", "str", args[3]); goto exit; } - errors = PyUnicode_AsUTF8(args[3]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -976,10 +996,15 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec _PyArg_BadArgument("loads", "argument 'encoding'", "str", args[2]); goto exit; } - encoding = PyUnicode_AsUTF8(args[2]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[2], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -989,10 +1014,15 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec _PyArg_BadArgument("loads", "argument 'errors'", "str", args[3]); goto exit; } - errors = PyUnicode_AsUTF8(args[3]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[3], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_kwonly; } @@ -1004,4 +1034,4 @@ _pickle_loads(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObjec exit: return return_value; } -/*[clinic end generated code: output=1c675a6680a6b90c input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7f0564b5fb5410a8 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_ssl.c.h b/Modules/clinic/_ssl.c.h index 7aa435f0b8f3cd..88401b0490a1bb 100644 --- a/Modules/clinic/_ssl.c.h +++ b/Modules/clinic/_ssl.c.h @@ -391,10 +391,15 @@ _ssl__SSLSocket_get_channel_binding(PySSLSocket *self, PyObject *const *args, Py _PyArg_BadArgument("get_channel_binding", "argument 'cb_type'", "str", args[0]); goto exit; } - cb_type = PyUnicode_AsUTF8(args[0]); + Py_ssize_t cb_type_length; + cb_type = PyUnicode_AsUTF8AndSize(args[0], &cb_type_length); if (cb_type == NULL) { goto exit; } + if (strlen(cb_type) != (size_t)cb_type_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = _ssl__SSLSocket_get_channel_binding_impl(self, cb_type); @@ -468,10 +473,15 @@ _ssl__SSLContext_set_ciphers(PySSLContext *self, PyObject *arg) _PyArg_BadArgument("set_ciphers", "argument", "str", arg); goto exit; } - cipherlist = PyUnicode_AsUTF8(arg); + Py_ssize_t cipherlist_length; + cipherlist = PyUnicode_AsUTF8AndSize(arg, &cipherlist_length); if (cipherlist == NULL) { goto exit; } + if (strlen(cipherlist) != (size_t)cipherlist_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _ssl__SSLContext_set_ciphers_impl(self, cipherlist); exit: @@ -1306,10 +1316,15 @@ _ssl_txt2obj(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("txt2obj", "argument 'txt'", "str", args[0]); goto exit; } - txt = PyUnicode_AsUTF8(args[0]); + Py_ssize_t txt_length; + txt = PyUnicode_AsUTF8AndSize(args[0], &txt_length); if (txt == NULL) { goto exit; } + if (strlen(txt) != (size_t)txt_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } @@ -1412,10 +1427,15 @@ _ssl_enum_certificates(PyObject *module, PyObject *const *args, Py_ssize_t nargs _PyArg_BadArgument("enum_certificates", "argument 'store_name'", "str", args[0]); goto exit; } - store_name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t store_name_length; + store_name = PyUnicode_AsUTF8AndSize(args[0], &store_name_length); if (store_name == NULL) { goto exit; } + if (strlen(store_name) != (size_t)store_name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _ssl_enum_certificates_impl(module, store_name); exit: @@ -1483,10 +1503,15 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje _PyArg_BadArgument("enum_crls", "argument 'store_name'", "str", args[0]); goto exit; } - store_name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t store_name_length; + store_name = PyUnicode_AsUTF8AndSize(args[0], &store_name_length); if (store_name == NULL) { goto exit; } + if (strlen(store_name) != (size_t)store_name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _ssl_enum_crls_impl(module, store_name); exit: @@ -1502,4 +1527,4 @@ _ssl_enum_crls(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObje #ifndef _SSL_ENUM_CRLS_METHODDEF #define _SSL_ENUM_CRLS_METHODDEF #endif /* !defined(_SSL_ENUM_CRLS_METHODDEF) */ -/*[clinic end generated code: output=8350af68e0a56792 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=aa6b0a898b6077fe input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_testclinic.c.h b/Modules/clinic/_testclinic.c.h index df81710364d910..fea30e778381de 100644 --- a/Modules/clinic/_testclinic.c.h +++ b/Modules/clinic/_testclinic.c.h @@ -2935,10 +2935,15 @@ clone_f1(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw _PyArg_BadArgument("clone_f1", "argument 'path'", "str", args[0]); goto exit; } - path = PyUnicode_AsUTF8(args[0]); + Py_ssize_t path_length; + path = PyUnicode_AsUTF8AndSize(args[0], &path_length); if (path == NULL) { goto exit; } + if (strlen(path) != (size_t)path_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = clone_f1_impl(module, path); exit: @@ -2996,10 +3001,15 @@ clone_f2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObject *kw _PyArg_BadArgument("clone_f2", "argument 'path'", "str", args[0]); goto exit; } - path = PyUnicode_AsUTF8(args[0]); + Py_ssize_t path_length; + path = PyUnicode_AsUTF8AndSize(args[0], &path_length); if (path == NULL) { goto exit; } + if (strlen(path) != (size_t)path_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = clone_f2_impl(module, path); exit: @@ -3131,4 +3141,4 @@ clone_with_conv_f2(PyObject *module, PyObject *const *args, Py_ssize_t nargs, Py exit: return return_value; } -/*[clinic end generated code: output=32dc6ac90757da7a input=a9049054013a1b77]*/ +/*[clinic end generated code: output=90743ac900d60f9f input=a9049054013a1b77]*/ diff --git a/Modules/clinic/_tkinter.c.h b/Modules/clinic/_tkinter.c.h index 1ff3cec568e330..188bcc773cfc41 100644 --- a/Modules/clinic/_tkinter.c.h +++ b/Modules/clinic/_tkinter.c.h @@ -25,10 +25,15 @@ _tkinter_tkapp_eval(TkappObject *self, PyObject *arg) _PyArg_BadArgument("eval", "argument", "str", arg); goto exit; } - script = PyUnicode_AsUTF8(arg); + Py_ssize_t script_length; + script = PyUnicode_AsUTF8AndSize(arg, &script_length); if (script == NULL) { goto exit; } + if (strlen(script) != (size_t)script_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_eval_impl(self, script); exit: @@ -56,10 +61,15 @@ _tkinter_tkapp_evalfile(TkappObject *self, PyObject *arg) _PyArg_BadArgument("evalfile", "argument", "str", arg); goto exit; } - fileName = PyUnicode_AsUTF8(arg); + Py_ssize_t fileName_length; + fileName = PyUnicode_AsUTF8AndSize(arg, &fileName_length); if (fileName == NULL) { goto exit; } + if (strlen(fileName) != (size_t)fileName_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_evalfile_impl(self, fileName); exit: @@ -87,10 +97,15 @@ _tkinter_tkapp_record(TkappObject *self, PyObject *arg) _PyArg_BadArgument("record", "argument", "str", arg); goto exit; } - script = PyUnicode_AsUTF8(arg); + Py_ssize_t script_length; + script = PyUnicode_AsUTF8AndSize(arg, &script_length); if (script == NULL) { goto exit; } + if (strlen(script) != (size_t)script_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_record_impl(self, script); exit: @@ -118,10 +133,15 @@ _tkinter_tkapp_adderrorinfo(TkappObject *self, PyObject *arg) _PyArg_BadArgument("adderrorinfo", "argument", "str", arg); goto exit; } - msg = PyUnicode_AsUTF8(arg); + Py_ssize_t msg_length; + msg = PyUnicode_AsUTF8AndSize(arg, &msg_length); if (msg == NULL) { goto exit; } + if (strlen(msg) != (size_t)msg_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_adderrorinfo_impl(self, msg); exit: @@ -173,10 +193,15 @@ _tkinter_tkapp_exprstring(TkappObject *self, PyObject *arg) _PyArg_BadArgument("exprstring", "argument", "str", arg); goto exit; } - s = PyUnicode_AsUTF8(arg); + Py_ssize_t s_length; + s = PyUnicode_AsUTF8AndSize(arg, &s_length); if (s == NULL) { goto exit; } + if (strlen(s) != (size_t)s_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_exprstring_impl(self, s); exit: @@ -204,10 +229,15 @@ _tkinter_tkapp_exprlong(TkappObject *self, PyObject *arg) _PyArg_BadArgument("exprlong", "argument", "str", arg); goto exit; } - s = PyUnicode_AsUTF8(arg); + Py_ssize_t s_length; + s = PyUnicode_AsUTF8AndSize(arg, &s_length); if (s == NULL) { goto exit; } + if (strlen(s) != (size_t)s_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_exprlong_impl(self, s); exit: @@ -235,10 +265,15 @@ _tkinter_tkapp_exprdouble(TkappObject *self, PyObject *arg) _PyArg_BadArgument("exprdouble", "argument", "str", arg); goto exit; } - s = PyUnicode_AsUTF8(arg); + Py_ssize_t s_length; + s = PyUnicode_AsUTF8AndSize(arg, &s_length); if (s == NULL) { goto exit; } + if (strlen(s) != (size_t)s_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_exprdouble_impl(self, s); exit: @@ -266,10 +301,15 @@ _tkinter_tkapp_exprboolean(TkappObject *self, PyObject *arg) _PyArg_BadArgument("exprboolean", "argument", "str", arg); goto exit; } - s = PyUnicode_AsUTF8(arg); + Py_ssize_t s_length; + s = PyUnicode_AsUTF8AndSize(arg, &s_length); if (s == NULL) { goto exit; } + if (strlen(s) != (size_t)s_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_exprboolean_impl(self, s); exit: @@ -310,10 +350,15 @@ _tkinter_tkapp_createcommand(TkappObject *self, PyObject *const *args, Py_ssize_ _PyArg_BadArgument("createcommand", "argument 1", "str", args[0]); goto exit; } - name = PyUnicode_AsUTF8(args[0]); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(args[0], &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } func = args[1]; return_value = _tkinter_tkapp_createcommand_impl(self, name, func); @@ -342,10 +387,15 @@ _tkinter_tkapp_deletecommand(TkappObject *self, PyObject *arg) _PyArg_BadArgument("deletecommand", "argument", "str", arg); goto exit; } - name = PyUnicode_AsUTF8(arg); + Py_ssize_t name_length; + name = PyUnicode_AsUTF8AndSize(arg, &name_length); if (name == NULL) { goto exit; } + if (strlen(name) != (size_t)name_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _tkinter_tkapp_deletecommand_impl(self, name); exit: @@ -644,10 +694,15 @@ _tkinter_create(PyObject *module, PyObject *const *args, Py_ssize_t nargs) screenName = NULL; } else if (PyUnicode_Check(args[0])) { - screenName = PyUnicode_AsUTF8(args[0]); + Py_ssize_t screenName_length; + screenName = PyUnicode_AsUTF8AndSize(args[0], &screenName_length); if (screenName == NULL) { goto exit; } + if (strlen(screenName) != (size_t)screenName_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("create", "argument 1", "str or None", args[0]); @@ -660,10 +715,15 @@ _tkinter_create(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("create", "argument 2", "str", args[1]); goto exit; } - baseName = PyUnicode_AsUTF8(args[1]); + Py_ssize_t baseName_length; + baseName = PyUnicode_AsUTF8AndSize(args[1], &baseName_length); if (baseName == NULL) { goto exit; } + if (strlen(baseName) != (size_t)baseName_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (nargs < 3) { goto skip_optional; } @@ -671,10 +731,15 @@ _tkinter_create(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("create", "argument 3", "str", args[2]); goto exit; } - className = PyUnicode_AsUTF8(args[2]); + Py_ssize_t className_length; + className = PyUnicode_AsUTF8AndSize(args[2], &className_length); if (className == NULL) { goto exit; } + if (strlen(className) != (size_t)className_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (nargs < 4) { goto skip_optional; } @@ -710,10 +775,15 @@ _tkinter_create(PyObject *module, PyObject *const *args, Py_ssize_t nargs) use = NULL; } else if (PyUnicode_Check(args[7])) { - use = PyUnicode_AsUTF8(args[7]); + Py_ssize_t use_length; + use = PyUnicode_AsUTF8AndSize(args[7], &use_length); if (use == NULL) { goto exit; } + if (strlen(use) != (size_t)use_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("create", "argument 8", "str or None", args[7]); @@ -791,4 +861,4 @@ _tkinter_getbusywaitinterval(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef _TKINTER_TKAPP_DELETEFILEHANDLER_METHODDEF #define _TKINTER_TKAPP_DELETEFILEHANDLER_METHODDEF #endif /* !defined(_TKINTER_TKAPP_DELETEFILEHANDLER_METHODDEF) */ -/*[clinic end generated code: output=0c8b5f960d7738fd input=a9049054013a1b77]*/ +/*[clinic end generated code: output=d447501ec5aa9447 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/posixmodule.c.h b/Modules/clinic/posixmodule.c.h index d91793c8be8d96..9473dd70ff1460 100644 --- a/Modules/clinic/posixmodule.c.h +++ b/Modules/clinic/posixmodule.c.h @@ -4672,10 +4672,15 @@ os_getgrouplist(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("getgrouplist", "argument 1", "str", args[0]); goto exit; } - user = PyUnicode_AsUTF8(args[0]); + Py_ssize_t user_length; + user = PyUnicode_AsUTF8AndSize(args[0], &user_length); if (user == NULL) { goto exit; } + if (strlen(user) != (size_t)user_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } basegid = PyLong_AsInt(args[1]); if (basegid == -1 && PyErr_Occurred()) { goto exit; @@ -4721,10 +4726,15 @@ os_getgrouplist(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("getgrouplist", "argument 1", "str", args[0]); goto exit; } - user = PyUnicode_AsUTF8(args[0]); + Py_ssize_t user_length; + user = PyUnicode_AsUTF8AndSize(args[0], &user_length); if (user == NULL) { goto exit; } + if (strlen(user) != (size_t)user_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!_Py_Gid_Converter(args[1], &basegid)) { goto exit; } @@ -12393,4 +12403,4 @@ os_waitstatus_to_exitcode(PyObject *module, PyObject *const *args, Py_ssize_t na #ifndef OS_WAITSTATUS_TO_EXITCODE_METHODDEF #define OS_WAITSTATUS_TO_EXITCODE_METHODDEF #endif /* !defined(OS_WAITSTATUS_TO_EXITCODE_METHODDEF) */ -/*[clinic end generated code: output=a377982a6d1e77b9 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=a05abdc48e3def44 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/pyexpat.c.h b/Modules/clinic/pyexpat.c.h index 4fac03ebaf2a07..a5b93e68598204 100644 --- a/Modules/clinic/pyexpat.c.h +++ b/Modules/clinic/pyexpat.c.h @@ -129,10 +129,15 @@ pyexpat_xmlparser_SetBase(xmlparseobject *self, PyObject *arg) _PyArg_BadArgument("SetBase", "argument", "str", arg); goto exit; } - base = PyUnicode_AsUTF8(arg); + Py_ssize_t base_length; + base = PyUnicode_AsUTF8AndSize(arg, &base_length); if (base == NULL) { goto exit; } + if (strlen(base) != (size_t)base_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = pyexpat_xmlparser_SetBase_impl(self, base); exit: @@ -223,10 +228,15 @@ pyexpat_xmlparser_ExternalEntityParserCreate(xmlparseobject *self, PyTypeObject context = NULL; } else if (PyUnicode_Check(args[0])) { - context = PyUnicode_AsUTF8(args[0]); + Py_ssize_t context_length; + context = PyUnicode_AsUTF8AndSize(args[0], &context_length); if (context == NULL) { goto exit; } + if (strlen(context) != (size_t)context_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("ExternalEntityParserCreate", "argument 1", "str or None", args[0]); @@ -239,10 +249,15 @@ pyexpat_xmlparser_ExternalEntityParserCreate(xmlparseobject *self, PyTypeObject _PyArg_BadArgument("ExternalEntityParserCreate", "argument 2", "str", args[1]); goto exit; } - encoding = PyUnicode_AsUTF8(args[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_posonly: return_value = pyexpat_xmlparser_ExternalEntityParserCreate_impl(self, cls, context, encoding); @@ -403,10 +418,15 @@ pyexpat_ParserCreate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, encoding = NULL; } else if (PyUnicode_Check(args[0])) { - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("ParserCreate", "argument 'encoding'", "str or None", args[0]); @@ -421,10 +441,15 @@ pyexpat_ParserCreate(PyObject *module, PyObject *const *args, Py_ssize_t nargs, namespace_separator = NULL; } else if (PyUnicode_Check(args[1])) { - namespace_separator = PyUnicode_AsUTF8(args[1]); + Py_ssize_t namespace_separator_length; + namespace_separator = PyUnicode_AsUTF8AndSize(args[1], &namespace_separator_length); if (namespace_separator == NULL) { goto exit; } + if (strlen(namespace_separator) != (size_t)namespace_separator_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("ParserCreate", "argument 'namespace_separator'", "str or None", args[1]); @@ -473,4 +498,4 @@ pyexpat_ErrorString(PyObject *module, PyObject *arg) #ifndef PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #define PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF #endif /* !defined(PYEXPAT_XMLPARSER_USEFOREIGNDTD_METHODDEF) */ -/*[clinic end generated code: output=bfc1f3d3e2cbc8dc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=48c4296e43777df4 input=a9049054013a1b77]*/ diff --git a/Modules/clinic/symtablemodule.c.h b/Modules/clinic/symtablemodule.c.h index d624c22cf282ff..2ecd3afc00d2be 100644 --- a/Modules/clinic/symtablemodule.c.h +++ b/Modules/clinic/symtablemodule.c.h @@ -36,13 +36,18 @@ _symtable_symtable(PyObject *module, PyObject *const *args, Py_ssize_t nargs) _PyArg_BadArgument("symtable", "argument 3", "str", args[2]); goto exit; } - startstr = PyUnicode_AsUTF8(args[2]); + Py_ssize_t startstr_length; + startstr = PyUnicode_AsUTF8AndSize(args[2], &startstr_length); if (startstr == NULL) { goto exit; } + if (strlen(startstr) != (size_t)startstr_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = _symtable_symtable_impl(module, source, filename, startstr); exit: return return_value; } -/*[clinic end generated code: output=9af1ab5a114a1ec7 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=931964a76a72f850 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytearrayobject.c.h b/Objects/clinic/bytearrayobject.c.h index 355ae49ca74f71..d95245067e2608 100644 --- a/Objects/clinic/bytearrayobject.c.h +++ b/Objects/clinic/bytearrayobject.c.h @@ -68,10 +68,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytearray", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -80,10 +85,15 @@ bytearray___init__(PyObject *self, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytearray", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytearray___init___impl((PyByteArrayObject *)self, arg, encoding, errors); @@ -950,10 +960,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg _PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -962,10 +977,15 @@ bytearray_decode(PyByteArrayObject *self, PyObject *const *args, Py_ssize_t narg _PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytearray_decode_impl(self, encoding, errors); @@ -1241,4 +1261,4 @@ bytearray_sizeof(PyByteArrayObject *self, PyObject *Py_UNUSED(ignored)) { return bytearray_sizeof_impl(self); } -/*[clinic end generated code: output=5a7de6295a7ce6cc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=0797a5e03cda2a16 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/bytesobject.c.h b/Objects/clinic/bytesobject.c.h index 042d0bf86e453c..1e45be3e7aefb3 100644 --- a/Objects/clinic/bytesobject.c.h +++ b/Objects/clinic/bytesobject.c.h @@ -720,10 +720,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj _PyArg_BadArgument("decode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -732,10 +737,15 @@ bytes_decode(PyBytesObject *self, PyObject *const *args, Py_ssize_t nargs, PyObj _PyArg_BadArgument("decode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytes_decode_impl(self, encoding, errors); @@ -987,10 +997,15 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytes", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -999,14 +1014,19 @@ bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("bytes", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = bytes_new_impl(type, x, encoding, errors); exit: return return_value; } -/*[clinic end generated code: output=97aab3f6ae398664 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=8a49dbbd78914a6f input=a9049054013a1b77]*/ diff --git a/Objects/clinic/floatobject.c.h b/Objects/clinic/floatobject.c.h index 8c331197f05b5c..10f6149cc88c22 100644 --- a/Objects/clinic/floatobject.c.h +++ b/Objects/clinic/floatobject.c.h @@ -275,10 +275,15 @@ float___getformat__(PyTypeObject *type, PyObject *arg) _PyArg_BadArgument("__getformat__", "argument", "str", arg); goto exit; } - typestr = PyUnicode_AsUTF8(arg); + Py_ssize_t typestr_length; + typestr = PyUnicode_AsUTF8AndSize(arg, &typestr_length); if (typestr == NULL) { goto exit; } + if (strlen(typestr) != (size_t)typestr_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = float___getformat___impl(type, typestr); exit: @@ -313,4 +318,4 @@ float___format__(PyObject *self, PyObject *arg) exit: return return_value; } -/*[clinic end generated code: output=01f6fbd082eefead input=a9049054013a1b77]*/ +/*[clinic end generated code: output=c79743c8551c30d9 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/memoryobject.c.h b/Objects/clinic/memoryobject.c.h index ebc1e0617bd303..f199434dacb9e8 100644 --- a/Objects/clinic/memoryobject.c.h +++ b/Objects/clinic/memoryobject.c.h @@ -305,10 +305,15 @@ memoryview_tobytes(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t n order = NULL; } else if (PyUnicode_Check(args[0])) { - order = PyUnicode_AsUTF8(args[0]); + Py_ssize_t order_length; + order = PyUnicode_AsUTF8AndSize(args[0], &order_length); if (order == NULL) { goto exit; } + if (strlen(order) != (size_t)order_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } } else { _PyArg_BadArgument("tobytes", "argument 'order'", "str or None", args[0]); @@ -408,4 +413,4 @@ memoryview_hex(PyMemoryViewObject *self, PyObject *const *args, Py_ssize_t nargs exit: return return_value; } -/*[clinic end generated code: output=abd8c0ce804d8992 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=7e76a09106921ba2 input=a9049054013a1b77]*/ diff --git a/Objects/clinic/unicodeobject.c.h b/Objects/clinic/unicodeobject.c.h index 83e3bf22989848..7711434f17c2bc 100644 --- a/Objects/clinic/unicodeobject.c.h +++ b/Objects/clinic/unicodeobject.c.h @@ -203,10 +203,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("encode", "argument 'encoding'", "str", args[0]); goto exit; } - encoding = PyUnicode_AsUTF8(args[0]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(args[0], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -215,10 +220,15 @@ unicode_encode(PyObject *self, PyObject *const *args, Py_ssize_t nargs, PyObject _PyArg_BadArgument("encode", "argument 'errors'", "str", args[1]); goto exit; } - errors = PyUnicode_AsUTF8(args[1]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(args[1], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = unicode_encode_impl(self, encoding, errors); @@ -1463,10 +1473,15 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("str", "argument 'encoding'", "str", fastargs[1]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[1]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[1], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!--noptargs) { goto skip_optional_pos; } @@ -1475,14 +1490,19 @@ unicode_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("str", "argument 'errors'", "str", fastargs[2]); goto exit; } - errors = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t errors_length; + errors = PyUnicode_AsUTF8AndSize(fastargs[2], &errors_length); if (errors == NULL) { goto exit; } + if (strlen(errors) != (size_t)errors_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_pos: return_value = unicode_new_impl(type, x, encoding, errors); exit: return return_value; } -/*[clinic end generated code: output=20313d6339272ddc input=a9049054013a1b77]*/ +/*[clinic end generated code: output=873d8b3d09af3095 input=a9049054013a1b77]*/ diff --git a/Python/clinic/Python-tokenize.c.h b/Python/clinic/Python-tokenize.c.h index 7417020d94c2f2..730fa8ef2a2154 100644 --- a/Python/clinic/Python-tokenize.c.h +++ b/Python/clinic/Python-tokenize.c.h @@ -65,14 +65,19 @@ tokenizeriter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) _PyArg_BadArgument("tokenizeriter", "argument 'encoding'", "str", fastargs[2]); goto exit; } - encoding = PyUnicode_AsUTF8(fastargs[2]); + Py_ssize_t encoding_length; + encoding = PyUnicode_AsUTF8AndSize(fastargs[2], &encoding_length); if (encoding == NULL) { goto exit; } + if (strlen(encoding) != (size_t)encoding_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } skip_optional_kwonly: return_value = tokenizeriter_new_impl(type, readline, extra_tokens, encoding); exit: return return_value; } -/*[clinic end generated code: output=92cb8176149f0924 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=dcd6ec48f06a092e input=a9049054013a1b77]*/ diff --git a/Python/clinic/bltinmodule.c.h b/Python/clinic/bltinmodule.c.h index 4fb06bd0dba5fe..8d40e659b54a57 100644 --- a/Python/clinic/bltinmodule.c.h +++ b/Python/clinic/bltinmodule.c.h @@ -329,10 +329,15 @@ builtin_compile(PyObject *module, PyObject *const *args, Py_ssize_t nargs, PyObj _PyArg_BadArgument("compile", "argument 'mode'", "str", args[2]); goto exit; } - mode = PyUnicode_AsUTF8(args[2]); + Py_ssize_t mode_length; + mode = PyUnicode_AsUTF8AndSize(args[2], &mode_length); if (mode == NULL) { goto exit; } + if (strlen(mode) != (size_t)mode_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } if (!noptargs) { goto skip_optional_pos; } @@ -1207,4 +1212,4 @@ builtin_issubclass(PyObject *module, PyObject *const *args, Py_ssize_t nargs) exit: return return_value; } -/*[clinic end generated code: output=95d3813b1798f018 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=31bded5d08647a57 input=a9049054013a1b77]*/ diff --git a/Python/clinic/sysmodule.c.h b/Python/clinic/sysmodule.c.h index 571a34bd4a64af..98717ecc875b8b 100644 --- a/Python/clinic/sysmodule.c.h +++ b/Python/clinic/sysmodule.c.h @@ -1259,10 +1259,15 @@ sys_activate_stack_trampoline(PyObject *module, PyObject *arg) _PyArg_BadArgument("activate_stack_trampoline", "argument", "str", arg); goto exit; } - backend = PyUnicode_AsUTF8(arg); + Py_ssize_t backend_length; + backend = PyUnicode_AsUTF8AndSize(arg, &backend_length); if (backend == NULL) { goto exit; } + if (strlen(backend) != (size_t)backend_length) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + } return_value = sys_activate_stack_trampoline_impl(module, backend); exit: @@ -1447,4 +1452,4 @@ sys__get_cpu_count_config(PyObject *module, PyObject *Py_UNUSED(ignored)) #ifndef SYS_GETANDROIDAPILEVEL_METHODDEF #define SYS_GETANDROIDAPILEVEL_METHODDEF #endif /* !defined(SYS_GETANDROIDAPILEVEL_METHODDEF) */ -/*[clinic end generated code: output=cdfb714878deeaf1 input=a9049054013a1b77]*/ +/*[clinic end generated code: output=f36d45c829250775 input=a9049054013a1b77]*/ diff --git a/Tools/clinic/clinic.py b/Tools/clinic/clinic.py index 4f238a3dc0d4af..5f94b90ae09bd0 100755 --- a/Tools/clinic/clinic.py +++ b/Tools/clinic/clinic.py @@ -4350,23 +4350,34 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st {bad_argument} goto exit; }}}} - {paramname} = PyUnicode_AsUTF8({argname}); + Py_ssize_t {length_name}; + {paramname} = PyUnicode_AsUTF8AndSize({argname}, &{length_name}); if ({paramname} == NULL) {{{{ goto exit; }}}} + if (strlen({paramname}) != (size_t){length_name}) {{{{ + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + }}}} """, argname=argname, - bad_argument=self.bad_argument(displayname, 'str', limited_capi=limited_capi)) + bad_argument=self.bad_argument(displayname, 'str', limited_capi=limited_capi), + length_name=self.length_name) if self.format_unit == 'z': return self.format_code(""" if ({argname} == Py_None) {{{{ {paramname} = NULL; }}}} else if (PyUnicode_Check({argname})) {{{{ - {paramname} = PyUnicode_AsUTF8({argname}); + Py_ssize_t {length_name}; + {paramname} = PyUnicode_AsUTF8AndSize({argname}, &{length_name}); if ({paramname} == NULL) {{{{ goto exit; }}}} + if (strlen({paramname}) != (size_t){length_name}) {{{{ + PyErr_SetString(PyExc_ValueError, "embedded null character"); + goto exit; + }}}} }}}} else {{{{ {bad_argument} @@ -4374,7 +4385,8 @@ def parse_arg(self, argname: str, displayname: str, *, limited_capi: bool) -> st }}}} """, argname=argname, - bad_argument=self.bad_argument(displayname, 'str or None', limited_capi=limited_capi)) + bad_argument=self.bad_argument(displayname, 'str or None', limited_capi=limited_capi), + length_name=self.length_name) return super().parse_arg(argname, displayname, limited_capi=limited_capi) # From 938652fbec9f6f63bbb477c4ba3abc3910697336 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Nov 2023 22:50:57 +0100 Subject: [PATCH 2/5] Revert "gh-111089: Use PyUnicode_AsUTF8() in getargs.c (#111620)" This reverts commit cde1071b2a72e8261ca66053ef61431b7f3a81fd. --- Python/getargs.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Python/getargs.c b/Python/getargs.c index 4d91818ad21a44..5a12ca8def74fa 100644 --- a/Python/getargs.c +++ b/Python/getargs.c @@ -932,15 +932,19 @@ convertsimple(PyObject *arg, const char **p_format, va_list *p_va, int flags, } else { /* "s" or "z" */ const char **p = va_arg(*p_va, const char **); + Py_ssize_t len; sarg = NULL; if (c == 'z' && arg == Py_None) *p = NULL; else if (PyUnicode_Check(arg)) { - sarg = PyUnicode_AsUTF8(arg); - if (sarg == NULL) { + sarg = PyUnicode_AsUTF8AndSize(arg, &len); + if (sarg == NULL) return converterr(CONV_UNICODE, arg, msgbuf, bufsize); + if (strlen(sarg) != (size_t)len) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + RETURN_ERR_OCCURRED; } *p = sarg; } From be7e341a210c056b8b9243007ab06ca08a6bab12 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Nov 2023 22:54:24 +0100 Subject: [PATCH 3/5] Revert "gh-111089: PyUnicode_AsUTF8() now raises on embedded NUL (#111091)" This reverts commit d731579bfb9a497cfb0076cb6b221058a20088fe. --- Doc/c-api/unicode.rst | 8 ----- Doc/whatsnew/3.13.rst | 6 ---- Include/cpython/unicodeobject.h | 16 ++++++++++ Include/unicodeobject.h | 30 +++++++------------ Lib/test/test_capi/test_unicode.py | 5 +--- ...-10-20-01-42-43.gh-issue-111089.VIrd5q.rst | 2 -- Objects/typeobject.c | 5 ++-- Objects/unicodeobject.c | 8 +---- 8 files changed, 31 insertions(+), 49 deletions(-) delete mode 100644 Misc/NEWS.d/next/C API/2023-10-20-01-42-43.gh-issue-111089.VIrd5q.rst diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst index 5fa37963e07eff..e654412965a727 100644 --- a/Doc/c-api/unicode.rst +++ b/Doc/c-api/unicode.rst @@ -992,19 +992,11 @@ These are the UTF-8 codec APIs: As :c:func:`PyUnicode_AsUTF8AndSize`, but does not store the size. - Raise an exception if the *unicode* string contains embedded null - characters. To accept embedded null characters and truncate on purpose - at the first null byte, ``PyUnicode_AsUTF8AndSize(unicode, NULL)`` can be - used instead. - .. versionadded:: 3.3 .. versionchanged:: 3.7 The return type is now ``const char *`` rather of ``char *``. - .. versionchanged:: 3.13 - Raise an exception if the string contains embedded null characters. - UTF-32 Codecs """"""""""""" diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 84d50a68eace4b..8db8a798caf0a7 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1222,12 +1222,6 @@ Porting to Python 3.13 Note that ``Py_TRASHCAN_BEGIN`` has a second argument which should be the deallocation function it is in. -* The :c:func:`PyUnicode_AsUTF8` function now raises an exception if the string - contains embedded null characters. To accept embedded null characters and - truncate on purpose at the first null byte, - ``PyUnicode_AsUTF8AndSize(unicode, NULL)`` can be used instead. - (Contributed by Victor Stinner in :gh:`111089`.) - * On Windows, ``Python.h`` no longer includes the ```` standard header file. If needed, it should now be included explicitly. For example, it provides ``offsetof()`` function, and ``size_t`` and ``ptrdiff_t`` types. diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h index d200fa0622cef5..cd56a6a74acf51 100644 --- a/Include/cpython/unicodeobject.h +++ b/Include/cpython/unicodeobject.h @@ -440,6 +440,22 @@ PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( const void *buffer, Py_ssize_t size); +/* --- Manage the default encoding ---------------------------------------- */ + +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode. + + Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation + in the unicodeobject. + + _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to + support the previous internal function with the same behaviour. + + Use of this API is DEPRECATED since no size information can be + extracted from the returned data. +*/ + +PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); /* === Characters Type APIs =============================================== */ diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h index ee7b769ce5a6fc..dee00715b3c51d 100644 --- a/Include/unicodeobject.h +++ b/Include/unicodeobject.h @@ -443,25 +443,17 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( PyObject *unicode /* Unicode object */ ); -// Returns a pointer to the UTF-8 encoding of the Unicode object unicode. -// -// Raise an exception if the string contains embedded null characters. -// Use PyUnicode_AsUTF8AndSize() to accept embedded null characters. -// -// This function caches the UTF-8 encoded string in the Unicode object -// and subsequent calls will return the same string. The memory is released -// when the Unicode object is deallocated. -PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); - -// Returns a pointer to the UTF-8 encoding of the -// Unicode object unicode and the size of the encoded representation -// in bytes stored in `*size` (if size is not NULL). -// -// On error, `*size` is set to 0 (if size is not NULL). -// -// This function caches the UTF-8 encoded string in the Unicode object -// and subsequent calls will return the same string. The memory is released -// when the Unicode object is deallocated. +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode and the size of the encoded representation + in bytes stored in *size. + + In case of an error, no *size is set. + + This function caches the UTF-8 encoded string in the unicodeobject + and subsequent calls will return the same string. The memory is released + when the unicodeobject is deallocated. +*/ + #if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( PyObject *unicode, diff --git a/Lib/test/test_capi/test_unicode.py b/Lib/test/test_capi/test_unicode.py index d8537244b39555..bb6161abf4da81 100644 --- a/Lib/test/test_capi/test_unicode.py +++ b/Lib/test/test_capi/test_unicode.py @@ -914,10 +914,7 @@ def test_asutf8(self): self.assertEqual(unicode_asutf8('abc', 4), b'abc\0') self.assertEqual(unicode_asutf8('абв', 7), b'\xd0\xb0\xd0\xb1\xd0\xb2\0') self.assertEqual(unicode_asutf8('\U0001f600', 5), b'\xf0\x9f\x98\x80\0') - - # disallow embedded null characters - self.assertRaises(ValueError, unicode_asutf8, 'abc\0', 0) - self.assertRaises(ValueError, unicode_asutf8, 'abc\0def', 0) + self.assertEqual(unicode_asutf8('abc\0def', 8), b'abc\0def\0') self.assertRaises(UnicodeEncodeError, unicode_asutf8, '\ud8ff', 0) self.assertRaises(TypeError, unicode_asutf8, b'abc', 0) diff --git a/Misc/NEWS.d/next/C API/2023-10-20-01-42-43.gh-issue-111089.VIrd5q.rst b/Misc/NEWS.d/next/C API/2023-10-20-01-42-43.gh-issue-111089.VIrd5q.rst deleted file mode 100644 index 2008dd5438d2b5..00000000000000 --- a/Misc/NEWS.d/next/C API/2023-10-20-01-42-43.gh-issue-111089.VIrd5q.rst +++ /dev/null @@ -1,2 +0,0 @@ -The :c:func:`PyUnicode_AsUTF8` function now raises an exception if the -string contains embedded null characters. Patch by Victor Stinner. diff --git a/Objects/typeobject.c b/Objects/typeobject.c index f44e30cf0446a5..557464c6740c18 100644 --- a/Objects/typeobject.c +++ b/Objects/typeobject.c @@ -3501,14 +3501,13 @@ type_new_set_doc(PyTypeObject *type) return 0; } - Py_ssize_t doc_size; - const char *doc_str = PyUnicode_AsUTF8AndSize(doc, &doc_size); + const char *doc_str = PyUnicode_AsUTF8(doc); if (doc_str == NULL) { return -1; } // Silently truncate the docstring if it contains a null byte - Py_ssize_t size = doc_size + 1; + Py_ssize_t size = strlen(doc_str) + 1; char *tp_doc = (char *)PyObject_Malloc(size); if (tp_doc == NULL) { PyErr_NoMemory(); diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index 87636efcfca050..53e1e56babf952 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3844,13 +3844,7 @@ PyUnicode_AsUTF8AndSize(PyObject *unicode, Py_ssize_t *psize) const char * PyUnicode_AsUTF8(PyObject *unicode) { - Py_ssize_t size; - const char *utf8 = PyUnicode_AsUTF8AndSize(unicode, &size); - if (utf8 != NULL && strlen(utf8) != (size_t)size) { - PyErr_SetString(PyExc_ValueError, "embedded null character"); - return NULL; - } - return utf8; + return PyUnicode_AsUTF8AndSize(unicode, NULL); } /* From 0e874831110ff2afed87d68b7264a43f5fb6c5a4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Nov 2023 22:59:08 +0100 Subject: [PATCH 4/5] Revert "gh-111089: Add PyUnicode_AsUTF8() to the limited C API (#111121)" This reverts commit d8f32be5b6a736dc2fc9dca3f1bf176c82fc9b44. --- Doc/data/stable_abi.dat | 1 - Doc/whatsnew/3.13.rst | 3 --- Lib/test/test_stable_abi_ctypes.py | 1 - .../next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst | 2 -- Misc/stable_abi.toml | 2 -- Modules/_multiprocessing/posixshmem.c | 4 ++-- PC/python3dll.c | 1 - 7 files changed, 2 insertions(+), 12 deletions(-) delete mode 100644 Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst diff --git a/Doc/data/stable_abi.dat b/Doc/data/stable_abi.dat index 52d6d967d66327..811b1bd84d2417 100644 --- a/Doc/data/stable_abi.dat +++ b/Doc/data/stable_abi.dat @@ -726,7 +726,6 @@ function,PyUnicode_AsUCS4,3.7,, function,PyUnicode_AsUCS4Copy,3.7,, function,PyUnicode_AsUTF16String,3.2,, function,PyUnicode_AsUTF32String,3.2,, -function,PyUnicode_AsUTF8,3.13,, function,PyUnicode_AsUTF8AndSize,3.10,, function,PyUnicode_AsUTF8String,3.2,, function,PyUnicode_AsUnicodeEscapeString,3.2,, diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 8db8a798caf0a7..291e276dc67ce0 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -1149,9 +1149,6 @@ New Features :c:func:`PyErr_WriteUnraisable`, but allow to customize the warning mesage. (Contributed by Serhiy Storchaka in :gh:`108082`.) -* Add :c:func:`PyUnicode_AsUTF8` function to the limited C API. - (Contributed by Victor Stinner in :gh:`111089`.) - Porting to Python 3.13 ---------------------- diff --git a/Lib/test/test_stable_abi_ctypes.py b/Lib/test/test_stable_abi_ctypes.py index 85a63c44b49431..4976ac3642bbe4 100644 --- a/Lib/test/test_stable_abi_ctypes.py +++ b/Lib/test/test_stable_abi_ctypes.py @@ -745,7 +745,6 @@ def test_windows_feature_macros(self): "PyUnicode_AsUCS4Copy", "PyUnicode_AsUTF16String", "PyUnicode_AsUTF32String", - "PyUnicode_AsUTF8", "PyUnicode_AsUTF8AndSize", "PyUnicode_AsUTF8String", "PyUnicode_AsUnicodeEscapeString", diff --git a/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst b/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst deleted file mode 100644 index fe32e06fe4f063..00000000000000 --- a/Misc/NEWS.d/next/C API/2023-10-20-18-07-24.gh-issue-111089.RxkyrQ.rst +++ /dev/null @@ -1,2 +0,0 @@ -Add :c:func:`PyUnicode_AsUTF8` function to the limited C API. Patch by -Victor Stinner. diff --git a/Misc/stable_abi.toml b/Misc/stable_abi.toml index b55bb599d71dcc..22b25dd0ec141f 100644 --- a/Misc/stable_abi.toml +++ b/Misc/stable_abi.toml @@ -2478,8 +2478,6 @@ added = '3.13' [function.PySys_AuditTuple] added = '3.13' -[function.PyUnicode_AsUTF8] - added = '3.13' [function._Py_SetRefcnt] added = '3.13' abi_only = true diff --git a/Modules/_multiprocessing/posixshmem.c b/Modules/_multiprocessing/posixshmem.c index b1f776cbbeca3f..cd08a9fedc0578 100644 --- a/Modules/_multiprocessing/posixshmem.c +++ b/Modules/_multiprocessing/posixshmem.c @@ -48,7 +48,7 @@ _posixshmem_shm_open_impl(PyObject *module, PyObject *path, int flags, { int fd; int async_err = 0; - const char *name = PyUnicode_AsUTF8(path); + const char *name = PyUnicode_AsUTF8AndSize(path, NULL); if (name == NULL) { return -1; } @@ -87,7 +87,7 @@ _posixshmem_shm_unlink_impl(PyObject *module, PyObject *path) { int rv; int async_err = 0; - const char *name = PyUnicode_AsUTF8(path); + const char *name = PyUnicode_AsUTF8AndSize(path, NULL); if (name == NULL) { return NULL; } diff --git a/PC/python3dll.c b/PC/python3dll.c index fa6bf1f0282b0a..07aa84c91f9fc7 100755 --- a/PC/python3dll.c +++ b/PC/python3dll.c @@ -662,7 +662,6 @@ EXPORT_FUNC(PyUnicode_AsUCS4Copy) EXPORT_FUNC(PyUnicode_AsUnicodeEscapeString) EXPORT_FUNC(PyUnicode_AsUTF16String) EXPORT_FUNC(PyUnicode_AsUTF32String) -EXPORT_FUNC(PyUnicode_AsUTF8) EXPORT_FUNC(PyUnicode_AsUTF8AndSize) EXPORT_FUNC(PyUnicode_AsUTF8String) EXPORT_FUNC(PyUnicode_AsWideChar) From 8b4a011b8d95bcb711f058fdc28b68ddd729a1c6 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 7 Nov 2023 23:04:33 +0100 Subject: [PATCH 5/5] Revert "gh-111089: Use PyUnicode_AsUTF8() in sqlite3 (#111122)" This reverts commit 37e4e20eaa8f27ada926d49e5971fecf0477ad26. --- Modules/_sqlite/connection.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Modules/_sqlite/connection.c b/Modules/_sqlite/connection.c index ce46c1e69f3131..319ed0c29c7a9b 100644 --- a/Modules/_sqlite/connection.c +++ b/Modules/_sqlite/connection.c @@ -76,10 +76,15 @@ isolation_level_converter(PyObject *str_or_none, const char **result) *result = NULL; } else if (PyUnicode_Check(str_or_none)) { - const char *str = PyUnicode_AsUTF8(str_or_none); + Py_ssize_t sz; + const char *str = PyUnicode_AsUTF8AndSize(str_or_none, &sz); if (str == NULL) { return 0; } + if (strlen(str) != (size_t)sz) { + PyErr_SetString(PyExc_ValueError, "embedded null character"); + return 0; + } const char *level = get_isolation_level(str); if (level == NULL) {