[Python-checkins] cpython: Update the embedded copy of the expat XML parser to 2.1.0. It brings

gregory.p.smith python-checkins at python.org
Sat Jul 14 23:12:51 CEST 2012


http://hg.python.org/cpython/rev/e4dc8be9a72f
changeset:   78100:e4dc8be9a72f
user:        Gregory P. Smith <greg at krypto.org>
date:        Sat Jul 14 14:12:35 2012 -0700
summary:
  Update the embedded copy of the expat XML parser to 2.1.0.  It brings
with it a vareity of bug fixes, both security and behavior.  See
http://www.libexpat.org/ for the list.

NOTE: I already backported the expat hash randomization fix in March.

Fixes issue #14340.

files:
  Modules/expat/COPYING          |    1 +
  Modules/expat/amigaconfig.h    |   64 ---
  Modules/expat/ascii.h          |    7 +
  Modules/expat/expat.h          |   29 +-
  Modules/expat/expat_external.h |   10 +-
  Modules/expat/internal.h       |    2 +-
  Modules/expat/watcomconfig.h   |   47 ++
  Modules/expat/xmlparse.c       |  433 ++++++++++++--------
  Modules/expat/xmlrole.c        |   12 +-
  Modules/expat/xmltok.c         |   22 +-
  Modules/expat/xmltok_impl.c    |    6 +-
  Modules/expat/xmltok_ns.c      |    9 +
  12 files changed, 379 insertions(+), 263 deletions(-)


diff --git a/Modules/expat/COPYING b/Modules/expat/COPYING
--- a/Modules/expat/COPYING
+++ b/Modules/expat/COPYING
@@ -1,5 +1,6 @@
 Copyright (c) 1998, 1999, 2000 Thai Open Source Software Center Ltd
                                and Clark Cooper
+Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006 Expat maintainers.
 
 Permission is hereby granted, free of charge, to any person obtaining
 a copy of this software and associated documentation files (the
diff --git a/Modules/expat/amigaconfig.h b/Modules/expat/amigaconfig.h
--- a/Modules/expat/amigaconfig.h
+++ b/Modules/expat/amigaconfig.h
@@ -10,66 +10,12 @@
 /* Define to 1 if you have the <check.h> header file. */
 #undef HAVE_CHECK_H
 
-/* Define to 1 if you have the <dlfcn.h> header file. */
-#undef HAVE_DLFCN_H
-
-/* Define to 1 if you have the <fcntl.h> header file. */
-#define HAVE_FCNTL_H 1
-
-/* Define to 1 if you have the `getpagesize' function. */
-#undef HAVE_GETPAGESIZE
-
-/* Define to 1 if you have the <inttypes.h> header file. */
-#define HAVE_INTTYPES_H 1
-
 /* Define to 1 if you have the `memmove' function. */
 #define HAVE_MEMMOVE 1
 
-/* Define to 1 if you have the <memory.h> header file. */
-#undef HAVE_MEMORY_H
-
-/* Define to 1 if you have a working `mmap' system call. */
-#undef HAVE_MMAP
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#define HAVE_STDINT_H 1
-
-/* Define to 1 if you have the <stdlib.h> header file. */
-#define HAVE_STDLIB_H 1
-
-/* Define to 1 if you have the <strings.h> header file. */
-#define HAVE_STRINGS_H 1
-
-/* Define to 1 if you have the <string.h> header file. */
-#define HAVE_STRING_H 1
-
-/* Define to 1 if you have the <sys/stat.h> header file. */
-#define HAVE_SYS_STAT_H 1
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#define HAVE_SYS_TYPES_H 1
-
 /* Define to 1 if you have the <unistd.h> header file. */
 #define HAVE_UNISTD_H 1
 
-/* Define to the address where bug reports for this package should be sent. */
-#define PACKAGE_BUGREPORT "expat-bugs at mail.libexpat.org"
-
-/* Define to the full name of this package. */
-#define PACKAGE_NAME "expat"
-
-/* Define to the full name and version of this package. */
-#define PACKAGE_STRING "expat 1.95.8"
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#define PACKAGE_VERSION "1.95.8"
-
-/* Define to 1 if you have the ANSI C header files. */
-#define STDC_HEADERS 1
-
 /* whether byteorder is bigendian */
 #define WORDS_BIGENDIAN
 
@@ -83,14 +29,4 @@
 /* Define to make XML Namespaces functionality available. */
 #define XML_NS
 
-/* Define to empty if `const' does not conform to ANSI C. */
-#undef const
-
-/* Define to `long' if <sys/types.h> does not define. */
-#undef off_t
-
-/* Define to `unsigned' if <sys/types.h> does not define. */
-#undef size_t
-
-
 #endif  /* AMIGACONFIG_H */
diff --git a/Modules/expat/ascii.h b/Modules/expat/ascii.h
--- a/Modules/expat/ascii.h
+++ b/Modules/expat/ascii.h
@@ -83,3 +83,10 @@
 #define ASCII_LSQB 0x5B
 #define ASCII_RSQB 0x5D
 #define ASCII_UNDERSCORE 0x5F
+#define ASCII_LPAREN 0x28
+#define ASCII_RPAREN 0x29
+#define ASCII_FF 0x0C
+#define ASCII_SLASH 0x2F
+#define ASCII_HASH 0x23
+#define ASCII_PIPE 0x7C
+#define ASCII_COMMA 0x2C
diff --git a/Modules/expat/expat.h b/Modules/expat/expat.h
--- a/Modules/expat/expat.h
+++ b/Modules/expat/expat.h
@@ -742,6 +742,29 @@
 XMLPARSEAPI(int)
 XML_GetIdAttributeIndex(XML_Parser parser);
 
+#ifdef XML_ATTR_INFO
+/* Source file byte offsets for the start and end of attribute names and values.
+   The value indices are exclusive of surrounding quotes; thus in a UTF-8 source
+   file an attribute value of "blah" will yield:
+   info->valueEnd - info->valueStart = 4 bytes.
+*/
+typedef struct {
+  XML_Index  nameStart;  /* Offset to beginning of the attribute name. */
+  XML_Index  nameEnd;    /* Offset after the attribute name's last byte. */
+  XML_Index  valueStart; /* Offset to beginning of the attribute value. */
+  XML_Index  valueEnd;   /* Offset after the attribute value's last byte. */
+} XML_AttrInfo;
+
+/* Returns an array of XML_AttrInfo structures for the attribute/value pairs
+   passed in last call to the XML_StartElementHandler that were specified
+   in the start-tag rather than defaulted. Each attribute/value pair counts
+   as 1; thus the number of entries in the array is
+   XML_GetSpecifiedAttributeCount(parser) / 2.
+*/
+XMLPARSEAPI(const XML_AttrInfo *)
+XML_GetAttributeInfo(XML_Parser parser);
+#endif
+
 /* Parses some input. Returns XML_STATUS_ERROR if a fatal error is
    detected.  The last call to XML_Parse must have isFinal true; len
    may be zero for this call (or any other).
@@ -994,7 +1017,9 @@
   XML_FEATURE_MIN_SIZE,
   XML_FEATURE_SIZEOF_XML_CHAR,
   XML_FEATURE_SIZEOF_XML_LCHAR,
-  XML_FEATURE_NS
+  XML_FEATURE_NS,
+  XML_FEATURE_LARGE_SIZE,
+  XML_FEATURE_ATTR_INFO
   /* Additional features must be added to the end of this enum. */
 };
 
@@ -1014,7 +1039,7 @@
    change to major or minor version.
 */
 #define XML_MAJOR_VERSION 2
-#define XML_MINOR_VERSION 0
+#define XML_MINOR_VERSION 1
 #define XML_MICRO_VERSION 0
 
 #ifdef __cplusplus
diff --git a/Modules/expat/expat_external.h b/Modules/expat/expat_external.h
--- a/Modules/expat/expat_external.h
+++ b/Modules/expat/expat_external.h
@@ -7,11 +7,7 @@
 
 /* External API definitions */
 
-/* Namespace external symbols to allow multiple libexpat version to
-   co-exist. */
-#include "pyexpatns.h"
-
-#if defined(_MSC_EXTENSIONS) && !defined(__CYGWIN__)
+#if defined(_MSC_EXTENSIONS) && !defined(__BEOS__) && !defined(__CYGWIN__)
 #define XML_USE_MSC_EXTENSIONS 1
 #endif
 
@@ -38,9 +34,9 @@
    system headers may assume the cdecl convention.
 */
 #ifndef XMLCALL
-#if defined(XML_USE_MSC_EXTENSIONS)
+#if defined(_MSC_VER)
 #define XMLCALL __cdecl
-#elif defined(__GNUC__) && defined(__i386)
+#elif defined(__GNUC__) && defined(__i386) && !defined(__INTEL_COMPILER)
 #define XMLCALL __attribute__((cdecl))
 #else
 /* For any platform which uses this definition and supports more than
diff --git a/Modules/expat/internal.h b/Modules/expat/internal.h
--- a/Modules/expat/internal.h
+++ b/Modules/expat/internal.h
@@ -20,7 +20,7 @@
          and therefore subject to change.
 */
 
-#if defined(__GNUC__) && defined(__i386__)
+#if defined(__GNUC__) && defined(__i386__) && !defined(__MINGW32__)
 /* We'll use this version by default only where we know it helps.
 
    regparm() generates warnings on Solaris boxes.   See SF bug #692878.
diff --git a/Modules/expat/watcomconfig.h b/Modules/expat/watcomconfig.h
new file mode 100644
--- /dev/null
+++ b/Modules/expat/watcomconfig.h
@@ -0,0 +1,47 @@
+/* expat_config.h for use with Open Watcom 1.5 and above.  */
+
+#ifndef WATCOMCONFIG_H
+#define WATCOMCONFIG_H
+
+#ifdef __NT__
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#undef WIN32_LEAN_AND_MEAN
+#endif
+
+/* 1234 = LIL_ENDIAN, 4321 = BIGENDIAN */
+#define BYTEORDER 1234
+
+/* Define to 1 if you have the `memmove' function. */
+#define HAVE_MEMMOVE 1
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#define HAVE_UNISTD_H 1
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "expat-bugs at mail.libexpat.org"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "expat"
+
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "expat 2.0.0"
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "2.0.0"
+
+/* Define to specify how much context to retain around the current parse
+   point. */
+#define XML_CONTEXT_BYTES 1024
+
+/* Define to make parameter entity parsing functionality available. */
+#define XML_DTD 1
+
+/* Define to make XML Namespaces functionality available. */
+#define XML_NS 1
+
+#endif
+
diff --git a/Modules/expat/xmlparse.c b/Modules/expat/xmlparse.c
--- a/Modules/expat/xmlparse.c
+++ b/Modules/expat/xmlparse.c
@@ -2,24 +2,27 @@
    See the file COPYING for copying permission.
 */
 
-#define XML_BUILDING_EXPAT 1
-
-#ifdef COMPILED_FROM_DSP
-#include "winconfig.h"
-#elif defined(MACOS_CLASSIC)
-#include "macconfig.h"
-#elif defined(__amigaos4__)
-#include "amigaconfig.h"
-#elif defined(HAVE_EXPAT_CONFIG_H)
-#include <expat_config.h>
-#endif /* ndef COMPILED_FROM_DSP */
-
 #include <stddef.h>
 #include <string.h>                     /* memset(), memcpy() */
 #include <assert.h>
 #include <limits.h>                     /* UINT_MAX */
 #include <time.h>                       /* time() */
 
+#define XML_BUILDING_EXPAT 1
+
+#ifdef COMPILED_FROM_DSP
+#include "winconfig.h"
+#elif defined(MACOS_CLASSIC)
+#include "macconfig.h"
+#elif defined(__amigaos__)
+#include "amigaconfig.h"
+#elif defined(__WATCOMC__)
+#include "watcomconfig.h"
+#elif defined(HAVE_EXPAT_CONFIG_H)
+#include <expat_config.h>
+#endif /* ndef COMPILED_FROM_DSP */
+
+#include "ascii.h"
 #include "expat.h"
 
 #ifdef XML_UNICODE
@@ -28,7 +31,8 @@
 #define XmlGetInternalEncoding XmlGetUtf16InternalEncoding
 #define XmlGetInternalEncodingNS XmlGetUtf16InternalEncodingNS
 #define XmlEncode XmlUtf16Encode
-#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((unsigned long)s) & 1))
+/* Using pointer subtraction to convert to integer type. */
+#define MUST_CONVERT(enc, s) (!(enc)->isUtf16 || (((char *)(s) - (char *)NULL) & 1))
 typedef unsigned short ICHAR;
 #else
 #define XML_ENCODE_MAX XML_UTF8_ENCODE_MAX
@@ -325,15 +329,15 @@
 static enum XML_Error
 initializeEncoding(XML_Parser parser);
 static enum XML_Error
-doProlog(XML_Parser parser, const ENCODING *enc, const char *s, 
-         const char *end, int tok, const char *next, const char **nextPtr, 
+doProlog(XML_Parser parser, const ENCODING *enc, const char *s,
+         const char *end, int tok, const char *next, const char **nextPtr,
          XML_Bool haveMore);
 static enum XML_Error
-processInternalEntity(XML_Parser parser, ENTITY *entity, 
+processInternalEntity(XML_Parser parser, ENTITY *entity,
                       XML_Bool betweenDecl);
 static enum XML_Error
 doContent(XML_Parser parser, int startTagLevel, const ENCODING *enc,
-          const char *start, const char *end, const char **endPtr, 
+          const char *start, const char *end, const char **endPtr,
           XML_Bool haveMore);
 static enum XML_Error
 doCdataSection(XML_Parser parser, const ENCODING *, const char **startPtr,
@@ -351,7 +355,7 @@
 addBinding(XML_Parser parser, PREFIX *prefix, const ATTRIBUTE_ID *attId,
            const XML_Char *uri, BINDING **bindingsPtr);
 static int
-defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata, 
+defineAttribute(ELEMENT_TYPE *type, ATTRIBUTE_ID *, XML_Bool isCdata,
                 XML_Bool isId, const XML_Char *dfltValue, XML_Parser parser);
 static enum XML_Error
 storeAttributeValue(XML_Parser parser, const ENCODING *, XML_Bool isCdata,
@@ -436,6 +440,7 @@
              const XML_Memory_Handling_Suite *memsuite,
              const XML_Char *nameSep,
              DTD *dtd);
+
 static void
 parserInit(XML_Parser parser, const XML_Char *encodingName);
 
@@ -535,6 +540,9 @@
   NS_ATT *m_nsAtts;
   unsigned long m_nsAttsVersion;
   unsigned char m_nsAttsPower;
+#ifdef XML_ATTR_INFO
+  XML_AttrInfo *m_attInfo;
+#endif
   POSITION m_position;
   STRING_POOL m_tempPool;
   STRING_POOL m_temp2Pool;
@@ -643,6 +651,7 @@
 #define nsAtts (parser->m_nsAtts)
 #define nsAttsVersion (parser->m_nsAttsVersion)
 #define nsAttsPower (parser->m_nsAttsPower)
+#define attInfo (parser->m_attInfo)
 #define tempPool (parser->m_tempPool)
 #define temp2Pool (parser->m_temp2Pool)
 #define groupConnector (parser->m_groupConnector)
@@ -673,10 +682,12 @@
 }
 
 static const XML_Char implicitContext[] = {
-  'x', 'm', 'l', '=', 'h', 't', 't', 'p', ':', '/', '/',
-  'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/',
-  'X', 'M', 'L', '/', '1', '9', '9', '8', '/',
-  'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0'
+  ASCII_x, ASCII_m, ASCII_l, ASCII_EQUALS, ASCII_h, ASCII_t, ASCII_t, ASCII_p,
+  ASCII_COLON, ASCII_SLASH, ASCII_SLASH, ASCII_w, ASCII_w, ASCII_w,
+  ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD, ASCII_o, ASCII_r, ASCII_g,
+  ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L, ASCII_SLASH, ASCII_1, ASCII_9,
+  ASCII_9, ASCII_8, ASCII_SLASH, ASCII_n, ASCII_a, ASCII_m, ASCII_e,
+  ASCII_s, ASCII_p, ASCII_a, ASCII_c, ASCII_e, '\0'
 };
 
 static unsigned long
@@ -690,23 +701,22 @@
 static XML_Bool  /* only valid for root parser */
 startParsing(XML_Parser parser)
 {
-  /* hash functions must be initialized before setContext() is called */
-
-  if (hash_secret_salt == 0)
-    hash_secret_salt = generate_hash_secret_salt();
-  if (ns) {
-    /* implicit context only set for root parser, since child
-       parsers (i.e. external entity parsers) will inherit it
-    */
-    return setContext(parser, implicitContext);
-  }
-  return XML_TRUE;
+    /* hash functions must be initialized before setContext() is called */
+    if (hash_secret_salt == 0)
+      hash_secret_salt = generate_hash_secret_salt();
+    if (ns) {
+      /* implicit context only set for root parser, since child
+         parsers (i.e. external entity parsers) will inherit it
+      */
+      return setContext(parser, implicitContext);
+    }
+    return XML_TRUE;
 }
 
 XML_Parser XMLCALL
 XML_ParserCreate_MM(const XML_Char *encodingName,
-    const XML_Memory_Handling_Suite *memsuite,
-    const XML_Char *nameSep)
+                    const XML_Memory_Handling_Suite *memsuite,
+                    const XML_Char *nameSep)
 {
   return parserCreate(encodingName, memsuite, nameSep, NULL);
 }
@@ -753,9 +763,20 @@
     FREE(parser);
     return NULL;
   }
+#ifdef XML_ATTR_INFO
+  attInfo = (XML_AttrInfo*)MALLOC(attsSize * sizeof(XML_AttrInfo));
+  if (attInfo == NULL) {
+    FREE(atts);
+    FREE(parser);
+    return NULL;
+  }
+#endif
   dataBuf = (XML_Char *)MALLOC(INIT_DATA_BUF_SIZE * sizeof(XML_Char));
   if (dataBuf == NULL) {
     FREE(atts);
+#ifdef XML_ATTR_INFO
+    FREE(attInfo);
+#endif
     FREE(parser);
     return NULL;
   }
@@ -768,6 +789,9 @@
     if (_dtd == NULL) {
       FREE(dataBuf);
       FREE(atts);
+#ifdef XML_ATTR_INFO
+      FREE(attInfo);
+#endif
       FREE(parser);
       return NULL;
     }
@@ -783,7 +807,7 @@
   unknownEncodingHandler = NULL;
   unknownEncodingHandlerData = NULL;
 
-  namespaceSeparator = '!';
+  namespaceSeparator = ASCII_EXCL;
   ns = XML_FALSE;
   ns_triplets = XML_FALSE;
 
@@ -1154,6 +1178,9 @@
 #endif /* XML_DTD */
     dtdDestroy(_dtd, (XML_Bool)!parentParser, &parser->m_mem);
   FREE((void *)atts);
+#ifdef XML_ATTR_INFO
+  FREE((void *)attInfo);
+#endif
   FREE(groupConnector);
   FREE(buffer);
   FREE(dataBuf);
@@ -1234,6 +1261,14 @@
   return idAttIndex;
 }
 
+#ifdef XML_ATTR_INFO
+const XML_AttrInfo * XMLCALL
+XML_GetAttributeInfo(XML_Parser parser)
+{
+  return attInfo;
+}
+#endif
+
 void XMLCALL
 XML_SetElementHandler(XML_Parser parser,
                       XML_StartElementHandler start,
@@ -1499,7 +1534,7 @@
         XmlUpdatePosition(encoding, positionPtr, bufferPtr, &position);
         positionPtr = bufferPtr;
         return XML_STATUS_SUSPENDED;
-      case XML_INITIALIZED: 
+      case XML_INITIALIZED:
       case XML_PARSING:
         ps_parsing = XML_FINISHED;
         /* fall through */
@@ -1555,15 +1590,11 @@
                 : (char *)REALLOC(buffer, len * 2));
         if (temp == NULL) {
           errorCode = XML_ERROR_NO_MEMORY;
-          return XML_STATUS_ERROR;
-        }
-        buffer = temp;
-        if (!buffer) {
-          errorCode = XML_ERROR_NO_MEMORY;
           eventPtr = eventEndPtr = NULL;
           processor = errorProcessor;
           return XML_STATUS_ERROR;
         }
+        buffer = temp;
         bufferLim = buffer + len * 2;
       }
       memcpy(buffer, end, nLeftOver);
@@ -1629,7 +1660,7 @@
     case XML_SUSPENDED:
       result = XML_STATUS_SUSPENDED;
       break;
-    case XML_INITIALIZED: 
+    case XML_INITIALIZED:
     case XML_PARSING:
       if (isFinal) {
         ps_parsing = XML_FINISHED;
@@ -1719,6 +1750,8 @@
       bufferPtr = buffer = newBuf;
 #endif  /* not defined XML_CONTEXT_BYTES */
     }
+    eventPtr = eventEndPtr = NULL;
+    positionPtr = NULL;
   }
   return bufferEnd;
 }
@@ -1776,7 +1809,7 @@
     case XML_SUSPENDED:
       result = XML_STATUS_SUSPENDED;
       break;
-    case XML_INITIALIZED: 
+    case XML_INITIALIZED:
     case XML_PARSING:
       if (ps_finalBuffer) {
         ps_parsing = XML_FINISHED;
@@ -2001,6 +2034,12 @@
 #ifdef XML_NS
     {XML_FEATURE_NS,               XML_L("XML_NS"), 0},
 #endif
+#ifdef XML_LARGE_SIZE
+    {XML_FEATURE_LARGE_SIZE,       XML_L("XML_LARGE_SIZE"), 0},
+#endif
+#ifdef XML_ATTR_INFO
+    {XML_FEATURE_ATTR_INFO,        XML_L("XML_ATTR_INFO"), 0},
+#endif
     {XML_FEATURE_END,              NULL, 0}
   };
 
@@ -2063,7 +2102,7 @@
                  const char *end,
                  const char **endPtr)
 {
-  enum XML_Error result = doContent(parser, 0, encoding, start, end, 
+  enum XML_Error result = doContent(parser, 0, encoding, start, end,
                                     endPtr, (XML_Bool)!ps_finalBuffer);
   if (result == XML_ERROR_NONE) {
     if (!storeRawNames(parser))
@@ -2145,7 +2184,7 @@
       if (result != XML_ERROR_NONE)
         return result;
       switch (ps_parsing) {
-      case XML_SUSPENDED: 
+      case XML_SUSPENDED:
         *endPtr = next;
         return XML_ERROR_NONE;
       case XML_FINISHED:
@@ -2179,7 +2218,7 @@
                                const char *end,
                                const char **endPtr)
 {
-  enum XML_Error result = doContent(parser, 1, encoding, start, end, 
+  enum XML_Error result = doContent(parser, 1, encoding, start, end,
                                     endPtr, (XML_Bool)!ps_finalBuffer);
   if (result == XML_ERROR_NONE) {
     if (!storeRawNames(parser))
@@ -2198,7 +2237,7 @@
           XML_Bool haveMore)
 {
   /* save one level of indirection */
-  DTD * const dtd = _dtd;  
+  DTD * const dtd = _dtd;
 
   const char **eventPP;
   const char **eventEndPP;
@@ -2229,8 +2268,8 @@
       }
       else if (defaultHandler)
         reportDefault(parser, enc, s, end);
-      /* We are at the end of the final buffer, should we check for 
-         XML_SUSPENDED, XML_FINISHED? 
+      /* We are at the end of the final buffer, should we check for
+         XML_SUSPENDED, XML_FINISHED?
       */
       if (startTagLevel == 0)
         return XML_ERROR_NO_ELEMENTS;
@@ -2581,8 +2620,8 @@
       }
       else if (defaultHandler)
         reportDefault(parser, enc, s, end);
-      /* We are at the end of the final buffer, should we check for 
-         XML_SUSPENDED, XML_FINISHED? 
+      /* We are at the end of the final buffer, should we check for
+         XML_SUSPENDED, XML_FINISHED?
       */
       if (startTagLevel == 0) {
         *eventPP = end;
@@ -2595,26 +2634,29 @@
       *nextPtr = end;
       return XML_ERROR_NONE;
     case XML_TOK_DATA_CHARS:
-      if (characterDataHandler) {
-        if (MUST_CONVERT(enc, s)) {
-          for (;;) {
-            ICHAR *dataPtr = (ICHAR *)dataBuf;
-            XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
-            *eventEndPP = s;
-            characterDataHandler(handlerArg, dataBuf,
-                                 (int)(dataPtr - (ICHAR *)dataBuf));
-            if (s == next)
-              break;
-            *eventPP = s;
+      {
+        XML_CharacterDataHandler charDataHandler = characterDataHandler;
+        if (charDataHandler) {
+          if (MUST_CONVERT(enc, s)) {
+            for (;;) {
+              ICHAR *dataPtr = (ICHAR *)dataBuf;
+              XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+              *eventEndPP = s;
+              charDataHandler(handlerArg, dataBuf,
+                              (int)(dataPtr - (ICHAR *)dataBuf));
+              if (s == next)
+                break;
+              *eventPP = s;
+            }
           }
+          else
+            charDataHandler(handlerArg,
+                            (XML_Char *)s,
+                            (int)((XML_Char *)next - (XML_Char *)s));
         }
-        else
-          characterDataHandler(handlerArg,
-                               (XML_Char *)s,
-                               (int)((XML_Char *)next - (XML_Char *)s));
+        else if (defaultHandler)
+          reportDefault(parser, enc, s, next);
       }
-      else if (defaultHandler)
-        reportDefault(parser, enc, s, next);
       break;
     case XML_TOK_PI:
       if (!reportProcessingInstruction(parser, enc, s, next))
@@ -2631,7 +2673,7 @@
     }
     *eventPP = s = next;
     switch (ps_parsing) {
-    case XML_SUSPENDED: 
+    case XML_SUSPENDED:
       *nextPtr = next;
       return XML_ERROR_NONE;
     case XML_FINISHED:
@@ -2690,23 +2732,44 @@
   if (n + nDefaultAtts > attsSize) {
     int oldAttsSize = attsSize;
     ATTRIBUTE *temp;
+#ifdef XML_ATTR_INFO
+    XML_AttrInfo *temp2;
+#endif
     attsSize = n + nDefaultAtts + INIT_ATTS_SIZE;
     temp = (ATTRIBUTE *)REALLOC((void *)atts, attsSize * sizeof(ATTRIBUTE));
     if (temp == NULL)
       return XML_ERROR_NO_MEMORY;
     atts = temp;
+#ifdef XML_ATTR_INFO
+    temp2 = (XML_AttrInfo *)REALLOC((void *)attInfo, attsSize * sizeof(XML_AttrInfo));
+    if (temp2 == NULL)
+      return XML_ERROR_NO_MEMORY;
+    attInfo = temp2;
+#endif
     if (n > oldAttsSize)
       XmlGetAttributes(enc, attStr, n, atts);
   }
 
   appAtts = (const XML_Char **)atts;
   for (i = 0; i < n; i++) {
+    ATTRIBUTE *currAtt = &atts[i];
+#ifdef XML_ATTR_INFO
+    XML_AttrInfo *currAttInfo = &attInfo[i];
+#endif
     /* add the name and value to the attribute list */
-    ATTRIBUTE_ID *attId = getAttributeId(parser, enc, atts[i].name,
-                                         atts[i].name
-                                         + XmlNameLength(enc, atts[i].name));
+    ATTRIBUTE_ID *attId = getAttributeId(parser, enc, currAtt->name,
+                                         currAtt->name
+                                         + XmlNameLength(enc, currAtt->name));
     if (!attId)
       return XML_ERROR_NO_MEMORY;
+#ifdef XML_ATTR_INFO
+    currAttInfo->nameStart = parseEndByteIndex - (parseEndPtr - currAtt->name);
+    currAttInfo->nameEnd = currAttInfo->nameStart +
+                           XmlNameLength(enc, currAtt->name);
+    currAttInfo->valueStart = parseEndByteIndex -
+                            (parseEndPtr - currAtt->valuePtr);
+    currAttInfo->valueEnd = parseEndByteIndex - (parseEndPtr - currAtt->valueEnd);
+#endif
     /* Detect duplicate attributes by their QNames. This does not work when
        namespace processing is turned on and different prefixes for the same
        namespace are used. For this case we have a check further down.
@@ -2848,8 +2911,6 @@
         unsigned long uriHash = hash_secret_salt;
         ((XML_Char *)s)[-1] = 0;  /* clear flag */
         id = (ATTRIBUTE_ID *)lookup(parser, &dtd->attributeIds, s, 0);
-        if (!id)
-          return XML_ERROR_NO_MEMORY;
         b = id->prefix->binding;
         if (!b)
           return XML_ERROR_UNBOUND_PREFIX;
@@ -2861,7 +2922,7 @@
             return XML_ERROR_NO_MEMORY;
           uriHash = CHAR_HASH(uriHash, c);
         }
-        while (*s++ != XML_T(':'))
+        while (*s++ != XML_T(ASCII_COLON))
           ;
         do {  /* copies null terminator */
           const XML_Char c = *s;
@@ -2935,7 +2996,7 @@
     if (!binding)
       return XML_ERROR_UNBOUND_PREFIX;
     localPart = tagNamePtr->str;
-    while (*localPart++ != XML_T(':'))
+    while (*localPart++ != XML_T(ASCII_COLON))
       ;
   }
   else if (dtd->defaultPrefix.binding) {
@@ -2990,25 +3051,29 @@
            const XML_Char *uri, BINDING **bindingsPtr)
 {
   static const XML_Char xmlNamespace[] = {
-    'h', 't', 't', 'p', ':', '/', '/',
-    'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/',
-    'X', 'M', 'L', '/', '1', '9', '9', '8', '/',
-    'n', 'a', 'm', 'e', 's', 'p', 'a', 'c', 'e', '\0'
+    ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
+    ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
+    ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_X, ASCII_M, ASCII_L,
+    ASCII_SLASH, ASCII_1, ASCII_9, ASCII_9, ASCII_8, ASCII_SLASH,
+    ASCII_n, ASCII_a, ASCII_m, ASCII_e, ASCII_s, ASCII_p, ASCII_a, ASCII_c,
+    ASCII_e, '\0'
   };
-  static const int xmlLen = 
+  static const int xmlLen =
     (int)sizeof(xmlNamespace)/sizeof(XML_Char) - 1;
   static const XML_Char xmlnsNamespace[] = {
-    'h', 't', 't', 'p', ':', '/', '/',
-    'w', 'w', 'w', '.', 'w', '3', '.', 'o', 'r', 'g', '/',
-    '2', '0', '0', '0', '/', 'x', 'm', 'l', 'n', 's', '/', '\0'
+    ASCII_h, ASCII_t, ASCII_t, ASCII_p, ASCII_COLON, ASCII_SLASH, ASCII_SLASH,
+    ASCII_w, ASCII_w, ASCII_w, ASCII_PERIOD, ASCII_w, ASCII_3, ASCII_PERIOD,
+    ASCII_o, ASCII_r, ASCII_g, ASCII_SLASH, ASCII_2, ASCII_0, ASCII_0,
+    ASCII_0, ASCII_SLASH, ASCII_x, ASCII_m, ASCII_l, ASCII_n, ASCII_s,
+    ASCII_SLASH, '\0'
   };
-  static const int xmlnsLen = 
+  static const int xmlnsLen =
     (int)sizeof(xmlnsNamespace)/sizeof(XML_Char) - 1;
 
   XML_Bool mustBeXML = XML_FALSE;
   XML_Bool isXML = XML_TRUE;
   XML_Bool isXMLNS = XML_TRUE;
-  
+
   BINDING *b;
   int len;
 
@@ -3017,13 +3082,13 @@
     return XML_ERROR_UNDECLARING_PREFIX;
 
   if (prefix->name
-      && prefix->name[0] == XML_T('x')
-      && prefix->name[1] == XML_T('m')
-      && prefix->name[2] == XML_T('l')) {
+      && prefix->name[0] == XML_T(ASCII_x)
+      && prefix->name[1] == XML_T(ASCII_m)
+      && prefix->name[2] == XML_T(ASCII_l)) {
 
     /* Not allowed to bind xmlns */
-    if (prefix->name[3] == XML_T('n')
-        && prefix->name[4] == XML_T('s')
+    if (prefix->name[3] == XML_T(ASCII_n)
+        && prefix->name[4] == XML_T(ASCII_s)
         && prefix->name[5] == XML_T('\0'))
       return XML_ERROR_RESERVED_PREFIX_XMLNS;
 
@@ -3035,7 +3100,7 @@
     if (isXML && (len > xmlLen || uri[len] != xmlNamespace[len]))
       isXML = XML_FALSE;
 
-    if (!mustBeXML && isXMLNS 
+    if (!mustBeXML && isXMLNS
         && (len > xmlnsLen || uri[len] != xmlnsNamespace[len]))
       isXMLNS = XML_FALSE;
   }
@@ -3177,26 +3242,29 @@
         reportDefault(parser, enc, s, next);
       break;
     case XML_TOK_DATA_CHARS:
-      if (characterDataHandler) {
-        if (MUST_CONVERT(enc, s)) {
-          for (;;) {
-            ICHAR *dataPtr = (ICHAR *)dataBuf;
-            XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
-            *eventEndPP = next;
-            characterDataHandler(handlerArg, dataBuf,
-                                 (int)(dataPtr - (ICHAR *)dataBuf));
-            if (s == next)
-              break;
-            *eventPP = s;
+      {
+        XML_CharacterDataHandler charDataHandler = characterDataHandler;
+        if (charDataHandler) {
+          if (MUST_CONVERT(enc, s)) {
+            for (;;) {
+              ICHAR *dataPtr = (ICHAR *)dataBuf;
+              XmlConvert(enc, &s, next, &dataPtr, (ICHAR *)dataBufEnd);
+              *eventEndPP = next;
+              charDataHandler(handlerArg, dataBuf,
+                              (int)(dataPtr - (ICHAR *)dataBuf));
+              if (s == next)
+                break;
+              *eventPP = s;
+            }
           }
+          else
+            charDataHandler(handlerArg,
+                            (XML_Char *)s,
+                            (int)((XML_Char *)next - (XML_Char *)s));
         }
-        else
-          characterDataHandler(handlerArg,
-                               (XML_Char *)s,
-                               (int)((XML_Char *)next - (XML_Char *)s));
+        else if (defaultHandler)
+          reportDefault(parser, enc, s, next);
       }
-      else if (defaultHandler)
-        reportDefault(parser, enc, s, next);
       break;
     case XML_TOK_INVALID:
       *eventPP = next;
@@ -3243,7 +3311,7 @@
                        const char *end,
                        const char **endPtr)
 {
-  enum XML_Error result = doIgnoreSection(parser, encoding, &start, end, 
+  enum XML_Error result = doIgnoreSection(parser, encoding, &start, end,
                                           endPtr, (XML_Bool)!ps_finalBuffer);
   if (result != XML_ERROR_NONE)
     return result;
@@ -3525,7 +3593,7 @@
   const char *next = start;
   eventPtr = start;
 
-  for (;;) {  
+  for (;;) {
     tok = XmlPrologTok(encoding, start, end, &next);
     eventEndPtr = next;
     if (tok <= 0) {
@@ -3553,7 +3621,7 @@
       if (result != XML_ERROR_NONE)
         return result;
       switch (ps_parsing) {
-      case XML_SUSPENDED: 
+      case XML_SUSPENDED:
         *nextPtr = next;
         return XML_ERROR_NONE;
       case XML_FINISHED:
@@ -3618,7 +3686,7 @@
   }
 
   processor = prologProcessor;
-  return doProlog(parser, encoding, s, end, tok, next, 
+  return doProlog(parser, encoding, s, end, tok, next,
                   nextPtr, (XML_Bool)!ps_finalBuffer);
 }
 
@@ -3668,7 +3736,7 @@
 {
   const char *next = s;
   int tok = XmlPrologTok(encoding, s, end, &next);
-  return doProlog(parser, encoding, s, end, tok, next, 
+  return doProlog(parser, encoding, s, end, tok, next,
                   nextPtr, (XML_Bool)!ps_finalBuffer);
 }
 
@@ -3683,26 +3751,30 @@
          XML_Bool haveMore)
 {
 #ifdef XML_DTD
-  static const XML_Char externalSubsetName[] = { '#' , '\0' };
+  static const XML_Char externalSubsetName[] = { ASCII_HASH , '\0' };
 #endif /* XML_DTD */
-  static const XML_Char atypeCDATA[] = { 'C', 'D', 'A', 'T', 'A', '\0' };
-  static const XML_Char atypeID[] = { 'I', 'D', '\0' };
-  static const XML_Char atypeIDREF[] = { 'I', 'D', 'R', 'E', 'F', '\0' };
-  static const XML_Char atypeIDREFS[] = { 'I', 'D', 'R', 'E', 'F', 'S', '\0' };
-  static const XML_Char atypeENTITY[] = { 'E', 'N', 'T', 'I', 'T', 'Y', '\0' };
-  static const XML_Char atypeENTITIES[] =
-      { 'E', 'N', 'T', 'I', 'T', 'I', 'E', 'S', '\0' };
+  static const XML_Char atypeCDATA[] =
+      { ASCII_C, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
+  static const XML_Char atypeID[] = { ASCII_I, ASCII_D, '\0' };
+  static const XML_Char atypeIDREF[] =
+      { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
+  static const XML_Char atypeIDREFS[] =
+      { ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
+  static const XML_Char atypeENTITY[] =
+      { ASCII_E, ASCII_N, ASCII_T, ASCII_I, ASCII_T, ASCII_Y, '\0' };
+  static const XML_Char atypeENTITIES[] = { ASCII_E, ASCII_N,
+      ASCII_T, ASCII_I, ASCII_T, ASCII_I, ASCII_E, ASCII_S, '\0' };
   static const XML_Char atypeNMTOKEN[] = {
-      'N', 'M', 'T', 'O', 'K', 'E', 'N', '\0' };
-  static const XML_Char atypeNMTOKENS[] = {
-      'N', 'M', 'T', 'O', 'K', 'E', 'N', 'S', '\0' };
-  static const XML_Char notationPrefix[] = {
-      'N', 'O', 'T', 'A', 'T', 'I', 'O', 'N', '(', '\0' };
-  static const XML_Char enumValueSep[] = { '|', '\0' };
-  static const XML_Char enumValueStart[] = { '(', '\0' };
+      ASCII_N, ASCII_M, ASCII_T, ASCII_O, ASCII_K, ASCII_E, ASCII_N, '\0' };
+  static const XML_Char atypeNMTOKENS[] = { ASCII_N, ASCII_M, ASCII_T,
+      ASCII_O, ASCII_K, ASCII_E, ASCII_N, ASCII_S, '\0' };
+  static const XML_Char notationPrefix[] = { ASCII_N, ASCII_O, ASCII_T,
+      ASCII_A, ASCII_T, ASCII_I, ASCII_O, ASCII_N, ASCII_LPAREN, '\0' };
+  static const XML_Char enumValueSep[] = { ASCII_PIPE, '\0' };
+  static const XML_Char enumValueStart[] = { ASCII_LPAREN, '\0' };
 
   /* save one level of indirection */
-  DTD * const dtd = _dtd; 
+  DTD * const dtd = _dtd;
 
   const char **eventPP;
   const char **eventEndPP;
@@ -3818,15 +3890,17 @@
 #endif /* XML_DTD */
       dtd->hasParamEntityRefs = XML_TRUE;
       if (startDoctypeDeclHandler) {
+        XML_Char *pubId;
         if (!XmlIsPublicId(enc, s, next, eventPP))
           return XML_ERROR_PUBLICID;
-        doctypePubid = poolStoreString(&tempPool, enc,
-                                       s + enc->minBytesPerChar,
-                                       next - enc->minBytesPerChar);
-        if (!doctypePubid)
+        pubId = poolStoreString(&tempPool, enc,
+                                s + enc->minBytesPerChar,
+                                next - enc->minBytesPerChar);
+        if (!pubId)
           return XML_ERROR_NO_MEMORY;
-        normalizePublicId((XML_Char *)doctypePubid);
+        normalizePublicId(pubId);
         poolFinish(&tempPool);
+        doctypePubid = pubId;
         handleDefault = XML_FALSE;
         goto alreadyChecked;
       }
@@ -3881,8 +3955,8 @@
                                         entity->publicId))
             return XML_ERROR_EXTERNAL_ENTITY_HANDLING;
           if (dtd->paramEntityRead) {
-            if (!dtd->standalone && 
-                notStandaloneHandler && 
+            if (!dtd->standalone &&
+                notStandaloneHandler &&
                 !notStandaloneHandler(handlerArg))
               return XML_ERROR_NOT_STANDALONE;
           }
@@ -4010,11 +4084,11 @@
                              0, parser))
           return XML_ERROR_NO_MEMORY;
         if (attlistDeclHandler && declAttributeType) {
-          if (*declAttributeType == XML_T('(')
-              || (*declAttributeType == XML_T('N')
-                  && declAttributeType[1] == XML_T('O'))) {
+          if (*declAttributeType == XML_T(ASCII_LPAREN)
+              || (*declAttributeType == XML_T(ASCII_N)
+                  && declAttributeType[1] == XML_T(ASCII_O))) {
             /* Enumerated or Notation type */
-            if (!poolAppendChar(&tempPool, XML_T(')'))
+            if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
                 || !poolAppendChar(&tempPool, XML_T('\0')))
               return XML_ERROR_NO_MEMORY;
             declAttributeType = tempPool.start;
@@ -4047,11 +4121,11 @@
                              declAttributeIsCdata, XML_FALSE, attVal, parser))
           return XML_ERROR_NO_MEMORY;
         if (attlistDeclHandler && declAttributeType) {
-          if (*declAttributeType == XML_T('(')
-              || (*declAttributeType == XML_T('N')
-                  && declAttributeType[1] == XML_T('O'))) {
+          if (*declAttributeType == XML_T(ASCII_LPAREN)
+              || (*declAttributeType == XML_T(ASCII_N)
+                  && declAttributeType[1] == XML_T(ASCII_O))) {
             /* Enumerated or Notation type */
-            if (!poolAppendChar(&tempPool, XML_T(')'))
+            if (!poolAppendChar(&tempPool, XML_T(ASCII_RPAREN))
                 || !poolAppendChar(&tempPool, XML_T('\0')))
               return XML_ERROR_NO_MEMORY;
             declAttributeType = tempPool.start;
@@ -4321,7 +4395,7 @@
       switch (tok) {
       case XML_TOK_PARAM_ENTITY_REF:
         /* PE references in internal subset are
-           not allowed within declarations. */  
+           not allowed within declarations. */
         return XML_ERROR_PARAM_ENTITY_REF;
       case XML_TOK_XML_DECL:
         return XML_ERROR_MISPLACED_XML_PI;
@@ -4379,14 +4453,14 @@
       }
       break;
     case XML_ROLE_GROUP_SEQUENCE:
-      if (groupConnector[prologState.level] == '|')
+      if (groupConnector[prologState.level] == ASCII_PIPE)
         return XML_ERROR_SYNTAX;
-      groupConnector[prologState.level] = ',';
+      groupConnector[prologState.level] = ASCII_COMMA;
       if (dtd->in_eldecl && elementDeclHandler)
         handleDefault = XML_FALSE;
       break;
     case XML_ROLE_GROUP_CHOICE:
-      if (groupConnector[prologState.level] == ',')
+      if (groupConnector[prologState.level] == ASCII_COMMA)
         return XML_ERROR_SYNTAX;
       if (dtd->in_eldecl
           && !groupConnector[prologState.level]
@@ -4398,7 +4472,7 @@
         if (elementDeclHandler)
           handleDefault = XML_FALSE;
       }
-      groupConnector[prologState.level] = '|';
+      groupConnector[prologState.level] = ASCII_PIPE;
       break;
     case XML_ROLE_PARAM_ENTITY_REF:
 #ifdef XML_DTD
@@ -4442,7 +4516,7 @@
           return XML_ERROR_RECURSIVE_ENTITY_REF;
         if (entity->textPtr) {
           enum XML_Error result;
-          XML_Bool betweenDecl = 
+          XML_Bool betweenDecl =
             (role == XML_ROLE_PARAM_ENTITY_REF ? XML_TRUE : XML_FALSE);
           result = processInternalEntity(parser, entity, betweenDecl);
           if (result != XML_ERROR_NONE)
@@ -4637,7 +4711,7 @@
       reportDefault(parser, enc, s, next);
 
     switch (ps_parsing) {
-    case XML_SUSPENDED: 
+    case XML_SUSPENDED:
       *nextPtr = next;
       return XML_ERROR_NONE;
     case XML_FINISHED:
@@ -4707,7 +4781,7 @@
     }
     eventPtr = s = next;
     switch (ps_parsing) {
-    case XML_SUSPENDED: 
+    case XML_SUSPENDED:
       *nextPtr = next;
       return XML_ERROR_NONE;
     case XML_FINISHED:
@@ -4750,12 +4824,12 @@
 #ifdef XML_DTD
   if (entity->is_param) {
     int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
-    result = doProlog(parser, internalEncoding, textStart, textEnd, tok, 
+    result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
                       next, &next, XML_FALSE);
   }
-  else 
+  else
 #endif /* XML_DTD */
-    result = doContent(parser, tagLevel, internalEncoding, textStart, 
+    result = doContent(parser, tagLevel, internalEncoding, textStart,
                        textEnd, &next, XML_FALSE);
 
   if (result == XML_ERROR_NONE) {
@@ -4795,13 +4869,13 @@
 #ifdef XML_DTD
   if (entity->is_param) {
     int tok = XmlPrologTok(internalEncoding, textStart, textEnd, &next);
-    result = doProlog(parser, internalEncoding, textStart, textEnd, tok, 
+    result = doProlog(parser, internalEncoding, textStart, textEnd, tok,
                       next, &next, XML_FALSE);
   }
   else
 #endif /* XML_DTD */
-    result = doContent(parser, openEntity->startTagLevel, internalEncoding, 
-                       textStart, textEnd, &next, XML_FALSE);  
+    result = doContent(parser, openEntity->startTagLevel, internalEncoding,
+                       textStart, textEnd, &next, XML_FALSE);
 
   if (result != XML_ERROR_NONE)
     return result;
@@ -4822,7 +4896,7 @@
     int tok;
     processor = prologProcessor;
     tok = XmlPrologTok(encoding, s, end, &next);
-    return doProlog(parser, encoding, s, end, tok, next, nextPtr, 
+    return doProlog(parser, encoding, s, end, tok, next, nextPtr,
                     (XML_Bool)!ps_finalBuffer);
   }
   else
@@ -4831,8 +4905,8 @@
     processor = contentProcessor;
     /* see externalEntityContentProcessor vs contentProcessor */
     return doContent(parser, parentParser ? 1 : 0, encoding, s, end,
-                     nextPtr, (XML_Bool)!ps_finalBuffer); 
-  }  
+                     nextPtr, (XML_Bool)!ps_finalBuffer);
+  }
 }
 
 static enum XML_Error PTRCALL
@@ -4985,7 +5059,7 @@
         if (!entity->textPtr) {
           if (enc == encoding)
             eventPtr = ptr;
-              return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
+          return XML_ERROR_ATTRIBUTE_EXTERNAL_ENTITY_REF;
         }
         else {
           enum XML_Error result;
@@ -5328,7 +5402,7 @@
   DTD * const dtd = _dtd;  /* save one level of indirection */
   const XML_Char *name;
   for (name = elementType->name; *name; name++) {
-    if (*name == XML_T(':')) {
+    if (*name == XML_T(ASCII_COLON)) {
       PREFIX *prefix;
       const XML_Char *s;
       for (s = elementType->name; s != name; s++) {
@@ -5375,12 +5449,12 @@
     poolFinish(&dtd->pool);
     if (!ns)
       ;
-    else if (name[0] == XML_T('x')
-        && name[1] == XML_T('m')
-        && name[2] == XML_T('l')
-        && name[3] == XML_T('n')
-        && name[4] == XML_T('s')
-        && (name[5] == XML_T('\0') || name[5] == XML_T(':'))) {
+    else if (name[0] == XML_T(ASCII_x)
+        && name[1] == XML_T(ASCII_m)
+        && name[2] == XML_T(ASCII_l)
+        && name[3] == XML_T(ASCII_n)
+        && name[4] == XML_T(ASCII_s)
+        && (name[5] == XML_T('\0') || name[5] == XML_T(ASCII_COLON))) {
       if (name[5] == XML_T('\0'))
         id->prefix = &dtd->defaultPrefix;
       else
@@ -5391,7 +5465,7 @@
       int i;
       for (i = 0; name[i]; i++) {
         /* attributes without prefix are *not* in the default namespace */
-        if (name[i] == XML_T(':')) {
+        if (name[i] == XML_T(ASCII_COLON)) {
           int j;
           for (j = 0; j < i; j++) {
             if (!poolAppendChar(&dtd->pool, name[j]))
@@ -5401,8 +5475,6 @@
             return NULL;
           id->prefix = (PREFIX *)lookup(parser, &dtd->prefixes, poolStart(&dtd->pool),
                                         sizeof(PREFIX));
-          if (!id->prefix)
-            return NULL;
           if (id->prefix->name == poolStart(&dtd->pool))
             poolFinish(&dtd->pool);
           else
@@ -5415,7 +5487,7 @@
   return id;
 }
 
-#define CONTEXT_SEP XML_T('\f')
+#define CONTEXT_SEP XML_T(ASCII_FF)
 
 static const XML_Char *
 getContext(XML_Parser parser)
@@ -5427,7 +5499,7 @@
   if (dtd->defaultPrefix.binding) {
     int i;
     int len;
-    if (!poolAppendChar(&tempPool, XML_T('=')))
+    if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
       return NULL;
     len = dtd->defaultPrefix.binding->uriLen;
     if (namespaceSeparator)
@@ -5453,7 +5525,7 @@
     for (s = prefix->name; *s; s++)
       if (!poolAppendChar(&tempPool, *s))
         return NULL;
-    if (!poolAppendChar(&tempPool, XML_T('=')))
+    if (!poolAppendChar(&tempPool, XML_T(ASCII_EQUALS)))
       return NULL;
     len = prefix->binding->uriLen;
     if (namespaceSeparator)
@@ -5505,7 +5577,7 @@
       context = s;
       poolDiscard(&tempPool);
     }
-    else if (*s == XML_T('=')) {
+    else if (*s == XML_T(ASCII_EQUALS)) {
       PREFIX *prefix;
       if (poolLength(&tempPool) == 0)
         prefix = &dtd->defaultPrefix;
@@ -6162,12 +6234,13 @@
   }
   if (pool->blocks && pool->start == pool->blocks->s) {
     int blockSize = (int)(pool->end - pool->start)*2;
-    pool->blocks = (BLOCK *)
+    BLOCK *temp = (BLOCK *)
       pool->mem->realloc_fcn(pool->blocks,
                              (offsetof(BLOCK, s)
                               + blockSize * sizeof(XML_Char)));
-    if (pool->blocks == NULL)
+    if (temp == NULL)
       return XML_FALSE;
+    pool->blocks = temp;
     pool->blocks->size = blockSize;
     pool->ptr = pool->blocks->s + (pool->ptr - pool->start);
     pool->start = pool->blocks->s;
diff --git a/Modules/expat/xmlrole.c b/Modules/expat/xmlrole.c
--- a/Modules/expat/xmlrole.c
+++ b/Modules/expat/xmlrole.c
@@ -2,20 +2,22 @@
    See the file COPYING for copying permission.
 */
 
+#include <stddef.h>
+
 #ifdef COMPILED_FROM_DSP
 #include "winconfig.h"
 #elif defined(MACOS_CLASSIC)
 #include "macconfig.h"
-#elif defined(__amigaos4__)
+#elif defined(__amigaos__)
 #include "amigaconfig.h"
+#elif defined(__WATCOMC__)
+#include "watcomconfig.h"
 #else
 #ifdef HAVE_EXPAT_CONFIG_H
 #include <expat_config.h>
 #endif
 #endif /* ndef COMPILED_FROM_DSP */
 
-#include <stddef.h>
-
 #include "expat_external.h"
 #include "internal.h"
 #include "xmlrole.h"
@@ -53,12 +55,16 @@
     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, '\0' };
 static const char KW_IDREFS[] = {
     ASCII_I, ASCII_D, ASCII_R, ASCII_E, ASCII_F, ASCII_S, '\0' };
+#ifdef XML_DTD
 static const char KW_IGNORE[] = {
     ASCII_I, ASCII_G, ASCII_N, ASCII_O, ASCII_R, ASCII_E, '\0' };
+#endif
 static const char KW_IMPLIED[] = {
     ASCII_I, ASCII_M, ASCII_P, ASCII_L, ASCII_I, ASCII_E, ASCII_D, '\0' };
+#ifdef XML_DTD
 static const char KW_INCLUDE[] = {
     ASCII_I, ASCII_N, ASCII_C, ASCII_L, ASCII_U, ASCII_D, ASCII_E, '\0' };
+#endif
 static const char KW_NDATA[] = {
     ASCII_N, ASCII_D, ASCII_A, ASCII_T, ASCII_A, '\0' };
 static const char KW_NMTOKEN[] = {
diff --git a/Modules/expat/xmltok.c b/Modules/expat/xmltok.c
--- a/Modules/expat/xmltok.c
+++ b/Modules/expat/xmltok.c
@@ -2,20 +2,22 @@
    See the file COPYING for copying permission.
 */
 
+#include <stddef.h>
+
 #ifdef COMPILED_FROM_DSP
 #include "winconfig.h"
 #elif defined(MACOS_CLASSIC)
 #include "macconfig.h"
-#elif defined(__amigaos4__)
+#elif defined(__amigaos__)
 #include "amigaconfig.h"
+#elif defined(__WATCOMC__)
+#include "watcomconfig.h"
 #else
 #ifdef HAVE_EXPAT_CONFIG_H
 #include <expat_config.h>
 #endif
 #endif /* ndef COMPILED_FROM_DSP */
 
-#include <stddef.h>
-
 #include "expat_external.h"
 #include "internal.h"
 #include "xmltok.h"
@@ -295,7 +297,9 @@
 #endif
 
 #define PREFIX(ident) normal_ ## ident
+#define XML_TOK_IMPL_C
 #include "xmltok_impl.c"
+#undef XML_TOK_IMPL_C
 
 #undef MINBPC
 #undef BYTE_TYPE
@@ -692,7 +696,9 @@
 #define IS_NMSTRT_CHAR(enc, p, n) (0)
 #define IS_NMSTRT_CHAR_MINBPC(enc, p) LITTLE2_IS_NMSTRT_CHAR_MINBPC(enc, p)
 
+#define XML_TOK_IMPL_C
 #include "xmltok_impl.c"
+#undef XML_TOK_IMPL_C
 
 #undef MINBPC
 #undef BYTE_TYPE
@@ -831,7 +837,9 @@
 #define IS_NMSTRT_CHAR(enc, p, n) (0)
 #define IS_NMSTRT_CHAR_MINBPC(enc, p) BIG2_IS_NMSTRT_CHAR_MINBPC(enc, p)
 
+#define XML_TOK_IMPL_C
 #include "xmltok_impl.c"
+#undef XML_TOK_IMPL_C
 
 #undef MINBPC
 #undef BYTE_TYPE
@@ -1337,7 +1345,7 @@
 ENCODING *
 XmlInitUnknownEncoding(void *mem,
                        int *table,
-                       CONVERTER convert, 
+                       CONVERTER convert,
                        void *userData)
 {
   int i;
@@ -1610,7 +1618,9 @@
 
 #define NS(x) x
 #define ns(x) x
+#define XML_TOK_NS_C
 #include "xmltok_ns.c"
+#undef XML_TOK_NS_C
 #undef NS
 #undef ns
 
@@ -1619,7 +1629,9 @@
 #define NS(x) x ## NS
 #define ns(x) x ## _ns
 
+#define XML_TOK_NS_C
 #include "xmltok_ns.c"
+#undef XML_TOK_NS_C
 
 #undef NS
 #undef ns
@@ -1627,7 +1639,7 @@
 ENCODING *
 XmlInitUnknownEncodingNS(void *mem,
                          int *table,
-                         CONVERTER convert, 
+                         CONVERTER convert,
                          void *userData)
 {
   ENCODING *enc = XmlInitUnknownEncoding(mem, table, convert, userData);
diff --git a/Modules/expat/xmltok_impl.c b/Modules/expat/xmltok_impl.c
--- a/Modules/expat/xmltok_impl.c
+++ b/Modules/expat/xmltok_impl.c
@@ -2,6 +2,9 @@
    See the file COPYING for copying permission.
 */
 
+/* This file is included! */
+#ifdef XML_TOK_IMPL_C
+
 #ifndef IS_INVALID_CHAR
 #define IS_INVALID_CHAR(enc, ptr, n) (0)
 #endif
@@ -882,7 +885,7 @@
                     const char **nextTokPtr)
 {
   if (ptr == end)
-    return -XML_TOK_PERCENT;
+    return XML_TOK_PARTIAL;
   switch (BYTE_TYPE(enc, ptr)) {
   CHECK_NMSTRT_CASES(enc, ptr, end, nextTokPtr)
   case BT_S: case BT_LF: case BT_CR: case BT_PERCNT:
@@ -1777,3 +1780,4 @@
 #undef CHECK_NMSTRT_CASE
 #undef CHECK_NMSTRT_CASES
 
+#endif /* XML_TOK_IMPL_C */
diff --git a/Modules/expat/xmltok_ns.c b/Modules/expat/xmltok_ns.c
--- a/Modules/expat/xmltok_ns.c
+++ b/Modules/expat/xmltok_ns.c
@@ -1,3 +1,10 @@
+/* Copyright (c) 1998, 1999 Thai Open Source Software Center Ltd
+   See the file COPYING for copying permission.
+*/
+
+/* This file is included! */
+#ifdef XML_TOK_NS_C
+
 const ENCODING *
 NS(XmlGetUtf8InternalEncoding)(void)
 {
@@ -104,3 +111,5 @@
                         encoding,
                         standalone);
 }
+
+#endif /* XML_TOK_NS_C */

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list