Sorry if this is OT.
I've hit a repeatable segfault in pyexpat on RH Linux 9 with Python 2.3.3 (I'm trying
rss2email).
It seems that XML_parse is returning an error, but when XML_GetCurrentLineNumber
is called positionPtr is not valid.
I catch this in GDB and have looked at the stack (see below), but when I get back up
the stack into PyCFunction_Call I don't know what to do.
Ideally, I want to find out the Python source file and line number that is currently being
executed, then look at the Python source to figure out exactly which pyexpat call is
being made just before the call to get_parse_result.
It seems strange that (it appears) that Python code is calling get_parse_result directly.
So, how can I figure out where in the Python source the function call is coming from
using gdb? I'm sure it involves "print" and some casts.. I couldn't find a howto on
python.org
--
Python 2.3.3 (#1, Dec 22 2003, 14:01:09)
[GCC 3.2.2 20030222 (Red Hat Linux 3.2.2-5)] on linux2
Starting program: /usr/local/bin/python2.3 rss2email.py feeds.dat run --no-send
[New Thread 1074948352 (LWP 2379)]
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 1074948352 (LWP 2379)]
normal_updatePosition (enc=0x407211c0,
ptr=0x40785000
,
end=0x823ac09 "öteborg, Sweden, 7-9 June 2004. To help us serve the communit
y in the best way possible, we need your input on what you think we should do in
the way of tutorials.\"</i>]\n\n<!-- /newsinfo -->\n</p>\n\n\n,
end=0x823ac09 "öteborg, Sweden, 7-9 June 2004. To help us serve the communit
y in the best way possible, we need your input on what you think we should do in
the way of tutorials.\"</i>]\n\n<!-- /newsinfo -->\n</p>\n\n\n
,
end=0x823ac09 "öteborg, Sweden, 7-9 June 2004."
pos=0x8238954) at xmltok_impl.c:1745
1745 switch (BYTE_TYPE(enc, ptr)) {
(gdb) list
1740 const char *ptr,
1741 const char *end,
1742 POSITION *pos)
1743 {
1744 while (ptr != end) {
1745 switch (BYTE_TYPE(enc, ptr)) {
1746 #define LEAD_CASE(n) \
1747 case BT_LEAD ## n: \
1748 ptr += n; \
1749 break;
(gdb)
(gdb) frame 1
#1 0x40701804 in XML_GetCurrentLineNumber (parser=0x82387c0)
at /usr/local/src/Python-2.3.3/Modules/expat/xmlparse.c:1605
1605 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
(gdb) list
1600
1601 int XMLCALL
1602 XML_GetCurrentLineNumber(XML_Parser parser)
1603 {
1604 if (eventPtr) {
1605 XmlUpdatePosition(encoding, positionPtr, eventPtr, &position);
1606 positionPtr = eventPtr;
1607 }
1608 return position.lineNumber + 1;
1609 }
(gdb) frame 2
#2 0x406ff800 in set_error (self=0x4067e8ec, code=XML_ERROR_INVALID_TOKEN)
at /usr/local/src/Python-2.3.3/Modules/pyexpat.c:124
124 int lineno = XML_GetErrorLineNumber(parser);
(gdb) list
119 set_error(xmlparseobject *self, enum XML_Error code)
120 {
121 PyObject *err;
122 char buffer[256];
123 XML_Parser parser = self->itself;
124 int lineno = XML_GetErrorLineNumber(parser);
125 int column = XML_GetErrorColumnNumber(parser);
126
127 /* There is no risk of overflowing this buffer, since
128 even for 64-bit integers, there is sufficient space. */
(gdb) print *parser
$2 = {m_userData = 0x2, m_handlerArg = 0x40720ac0,
m_buffer = 0x82387c0 "ìèg@ìèg@\b@v@TA\005\b$D\005\b´E\005\b\b@v@=Dw@\b@x@5\004\001",
m_mem = {malloc_fcn = 0x1, realloc_fcn = 0, free_fcn = 0},
m_bufferPtr = 0x0, m_bufferEnd = 0x1
,
m_bufferLim = 0x0, m_parseEndByteIndex = 8192, m_parseEndPtr = 0x0,
m_dataBuf = 0x0,
m_dataBufEnd = 0x81e66b0 "<\224f@¬\222f@\204\222f@ì\223f@ü\222f@\024\224f@,
m_characterDataHandler = 0x23, m_processingInstructionHandler = 0xffffffff,
m_commentHandler = 0, m_startCdataSectionHandler = 0x7273752f,
m_endCdataSectionHandler = 0x636f6c2f, m_defaultHandler = 0x6c2f6c61,
m_startDoctypeDeclHandler = 0x702f6269,
m_endDoctypeDeclHandler = 0x6f687479,
m_unparsedEntityDeclHandler = 0x332e326e,
m_notationDeclHandler = 0x6165772f,
m_startNamespaceDeclHandler = 0x6665726b,
m_endNamespaceDeclHandler = 0x79702e, m_notStandaloneHandler = 0,
m_externalEntityRefHandler = 0, m_externalEntityRefHandlerArg = 0x1,
m_skippedEntityHandler = 0x8117660 ,
m_unknownEncodingHandler = 0x2, m_elementDeclHandler = 0x2,
m_attlistDeclHandler = 0x2, m_entityDeclHandler = 0x43,
m_xmlDeclHandler = 0x4067c3a0, m_encoding = 0x40677fac, m_initEncoding = {
initEnc = {scanners = {0x4067d3bc, 0x40643e0c, 0x4012802c, 0x4012802c},
literalScanners = {0x4067e920, 0x4067f180}, sameName = 0x122,
nameMatchesAscii = 0x4067f1a0, nameLength = 0x1,
skipS = 0x810a7a0 , getAtts = 0x23,
charRefNumber = 0xffffffff, predefinedEntityName = 0,
updatePosition = 0x7273752f, isPublicId = 0x636f6c2f,
utf8Convert = 0x6c2f6c61, utf16Convert = 0x702f6269,
minBytesPerChar = 1869116537, isUtf8 = 110 'n', isUtf16 = 50 '2'},
encPtr = 0x6165772f}, m_internalEncoding = 0x6665726b,
m_protocolEncodingName = 0x79702e ,
m_ns = 0 '\0', m_ns_triplets = 0 '\0', m_unknownEncodingMem = 0x0,
m_unknownEncodingData = 0x1, m_unknownEncodingHandlerData = 0x8117660,
m_unknownEncodingRelease = 0x1, m_prologState = {handler = 0x4, level = 2,
role_none = 67, includeLevel = 1080537744, documentEntity = 1080310604,
inEntityValue = 1080546284}, m_processor = 0x40678e14,
m_errorCode = 1074954284, m_eventPtr = 0x4012802c "}\v",
m_eventEndPtr = 0x4067e9a0 "\001", m_positionPtr = 0x4067f220 "\002",
m_openInternalEntities = 0x125, m_defaultExpandInternalEntities = 48 '0',
m_tagLevel = 1080552288, m_declEntity = 0x810a7a0,
m_doctypeName = 0x23 ,
m_doctypeSysid = 0xffffffff ,
m_doctypePubid = 0x0,
m_declAttributeType = 0x7273752f ,
m_declNotationName = 0x636f6c2f ,
m_declNotationPublicId = 0x6c2f6c61 ,
m_declElementType = 0x702f6269, m_declAttributeId = 0x6f687479,
m_declAttributeIsCdata = 110 'n', m_declAttributeIsId = 50 '2',
m_dtd = 0x6165772f,
m_curBase = 0x6665726b ,
m_tagStack = 0x79702e, m_freeTagList = 0x0, m_inheritedBindings = 0x0,
m_freeBindingList = 0x4067ea20, m_attsSize = 135362144,
m_nSpecifiedAtts = 0, m_idAttIndex = 0, m_atts = 0x1, m_nsAtts = 0x42,
m_nsAttsVersion = 1080542056, m_nsAttsPower = 236 'ì', m_position = {
lineNumber = 1080528484, columnNumber = 1074954284}, m_tempPool = {
blocks = 0x4012802c, freeBlocks = 0x4012802c,
end = 0x4067ea20 "`ëg@ §\020\b#",
ptr = 0x4067d410 " \020g@@\215g@ýÿÿÿ\001",
start = 0x121 , mem = 0x4067f1c0},
m_temp2Pool = {blocks = 0x1, freeBlocks = 0x810a7a0,
end = 0x22 ,
ptr = 0x5e84331 , start = 0x0,
mem = 0x70747468},
m_groupConnector = 0x6f2f2f3a ,
m_groupSize = 1852402798, m_namespaceSeparator = 101 'e',
m_parentParser = 0x746f6266, m_isParamEntity = 46 '.',
m_useForeignDTD = 111 'o', m_paramEntityParsing = 1852795427}
(gdb)
(gdb) frame 3
#3 0x406fe324 in xmlparse_Parse (self=0x4067e8ec, args=0x4064456c)
at /usr/local/src/Python-2.3.3/Modules/pyexpat.c:888
888 return set_error(self, XML_GetErrorCode(self->itself));
(gdb) list
883 {
884 if (PyErr_Occurred()) {
885 return NULL;
886 }
887 if (rv == 0) {
888 return set_error(self, XML_GetErrorCode(self->itself));
889 }
890 if (flush_character_buffer(self) < 0) {
891 return NULL;
892 }
(gdb) frame 4
#4 0x080ed614 in PyCFunction_Call (func=0x4067f38c, arg=0x4064456c,
kw=0x40785000) at ../Python-2.3.3/Objects/methodobject.c:108
108 return (*meth)(self, arg);
(gdb) list
103 size = PyTuple_GET_SIZE(arg);
104 if (size == 1)
105 arg = PyTuple_GET_ITEM(arg, 0);
106 else if (size == 0)
107 arg = NULL;
108 return (*meth)(self, arg);
109 }
110 break;
111 default:
112 PyErr_BadInternalCall();
--
Brad Clements, bkc@murkworks.com (315)268-1000
http://www.murkworks.com (315)268-9812 Fax
http://www.wecanstopspam.org/ AOL-IM: BKClements