[New-bugs-announce] [issue24566] Unsigned Integer Overflow in sre_lib.h

bee13oy report at bugs.python.org
Sun Jul 5 04:49:21 CEST 2015


New submission from bee13oy:

I found an Unsigned Integer Overflow in sre_lib.h.

Tested on En Windows 7 x86 + Python 3.4.3 / Python 3.5.0b2

Crash:
------
(1a84.16b0): Access violation - code c0000005 (!!! second chance !!!)
eax=00000002 ebx=0038f40c ecx=00000002 edx=0526cbb8 esi=83e0116b edi=c3e011eb
eip=58bcfa53 esp=0038f384 ebp=0038f394 iopl=0         nv up ei ng nz na po cy
cs=0023  ss=002b  ds=002b  es=002b  fs=0053  gs=002b             efl=00010283
python35+0x1fa53:
58bcfa53 380e            cmp     byte ptr [esi],cl          ds:002b:83e0116b=??

code: 
------
58bcfa3d 8b4a04          mov     ecx,dword ptr [edx+4]
58bcfa40 0fb6c1          movzx   eax,cl
58bcfa43 3bc1            cmp     eax,ecx
58bcfa45 0f8593000000    jne     python35+0x1fade (58bcfade)
58bcfa4b 3bf7            cmp     esi,edi
58bcfa4d 0f838b000000    jae     python35+0x1fade (58bcfade)
58bcfa53 380e            cmp     byte ptr [esi],cl          ds:002b:83e0116b=??
58bcfa55 0f8583000000    jne     python35+0x1fade (58bcfade)

stack:
------
0:000> kb
ChildEBP RetAddr  Args to Child              
WARNING: Stack unwind information not available. Following frames may be wrong.
0038f394 58bcfedf 40000080 0038f40c 83e0116c python35+0x1fa53
0038f3c0 58bd0f58 00000000 06016508 0526cb60 python35+0x1fedf
0038f400 58bd5039 58e40c58 83e0116b 03e01158 python35+0x20f58
0038f480 58bd76b2 00000000 7fffffff 00000000 python35+0x25039
0038f4a4 58c925cf 0526cb60 0528a4d0 00000000 python35+0x276b2
0038f4c4 58cf3633 06016508 0528a4d0 00000000 python35!PyCFunction_Call+0x2f
0038f4f8 58cf0b05 05840f90 03e0ab90 00000001 python35!PyEval_GetFuncDesc+0x373
0038f570 58cf3791 03e0ab90 00000000 00000001 python35!PyEval_EvalFrameEx+0x22d5
0038f594 58cf3692 00000001 00000001 00000000 python35!PyEval_GetFuncDesc+0x4d1
0038f5c8 58cf0b05 03e08de0 0012e850 00000000 python35!PyEval_GetFuncDesc+0x3d2
0038f640 58cf25bb 0012e850 00000000 065feff0 python35!PyEval_EvalFrameEx+0x22d5
0038f68c 58d29302 03dcfaa8 00000000 00000000 python35!PyEval_EvalFrameEx+0x3d8b
0038f6c8 58d29195 03dcfaa8 03dcfaa8 0038f790 python35!PyRun_FileExFlags+0x1f2
0038f6f4 58d2820a 05994fc8 052525a8 00000101 python35!PyRun_FileExFlags+0x85
0038f738 58bfe9f7 05994fc8 052525a8 00000001 python35!PyRun_SimpleFileExFlags+0x20a
0038f764 58bff32b 0038f790 5987b648 5987cc94 python35!Py_hashtable_copy+0x5e17
0038f808 1c6f11df 00000003 05796f70 05210f50 python35!Py_Main+0x90b

source code:

LOCAL(Py_ssize_t)
SRE(search)(SRE_STATE* state, SRE_CODE* pattern)
{
    SRE_CHAR* ptr = (SRE_CHAR *)state->start;
    SRE_CHAR* end = (SRE_CHAR *)state->end;
    Py_ssize_t status = 0;
    Py_ssize_t prefix_len = 0;
    Py_ssize_t prefix_skip = 0;
    SRE_CODE* prefix = NULL;
    SRE_CODE* charset = NULL;
    SRE_CODE* overlap = NULL;
    int flags = 0;

    if (pattern[0] == SRE_OP_INFO) {
        /* optimization info block */
        /* <INFO> <1=skip> <2=flags> <3=min> <4=max> <5=prefix info>  */

        flags = pattern[2];

        if (pattern[3] > 1) {
            /* adjust end point (but make sure we leave at least one
               character in there, so literal search will work) */
            end -= pattern[3] - 1;
            if (end <= ptr)
                end = ptr;
        }
		...
	}
	
	...
	
	} else
		/* general case */
		while (ptr <= end) {
			TRACE(("|%p|%p|SEARCH\n", pattern, ptr));
			state->start = state->ptr = ptr++;
			status = SRE(match)(state, pattern, 0);
			if (status != 0)
				break;
    }
}

SRE(count)(SRE_STATE* state, SRE_CODE* pattern, Py_ssize_t maxcount)
{
    SRE_CODE chr;
    SRE_CHAR c;
    SRE_CHAR* ptr = (SRE_CHAR *)state->ptr;
    SRE_CHAR* end = (SRE_CHAR *)state->end;
    Py_ssize_t i;

    /* adjust end */
    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
        end = ptr + maxcount;
		
    ...
	
#if SIZEOF_SRE_CHAR < 4
        if ((SRE_CODE) c != chr)
            ; /* literal can't match: doesn't fit in char width */
        else
#endif
        while (ptr < end && *ptr == c) // crash here, ptr points to an unreadable memory.
            ptr++;
        break;
}

poc code:
---cut----
import re

pattern = "([\\2]{1073741952})"
regexp = re.compile(r''+pattern+'')
sgroup = regexp.search(pattern)

---cut---

1.) In SRE(search), pattern[3] is equal to 1073741952 (0x400000080). What's more, the program doesn't limit the max size, which causes the end pointer is pointed to an invalid and large address( bigger than ptr).
2.) Then program run while (ptr <= end) { state->start = state->ptr = ptr++,..} , but state->end pointer is the orignal value.3.) After a while's running, it comes to SRE(count) and adjust the end, end - ptr = 0x7fffffff, which is largger than 0x400000080, ptr has been pointed to an invalid address.
3.) After a while, it runs to function SRE(count) and adjust the end, end - ptr = 0x7fffffff, which is largger than 0x400000080, ptr has been pointed to an invalid address.

----------
components: Regular Expressions
messages: 246290
nosy: bee13oy, ezio.melotti, mrabarnett
priority: normal
severity: normal
status: open
title: Unsigned Integer Overflow in sre_lib.h
versions: Python 3.4, Python 3.5

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue24566>
_______________________________________


More information about the New-bugs-announce mailing list