[Expat-checkins] CVS: expat/tests runtests.c,1.4,1.5

Fred L. Drake fdrake@users.sourceforge.net
Fri Nov 16 12:20:22 2001


Update of /cvsroot/expat/expat/tests
In directory usw-pr-cvs1:/tmp/cvs-serv25991/tests

Modified Files:
	runtests.c 
Log Message:
Added a test to make sure that whitespace in ENTITIES, IDREFS, and NMTOKENS
attributes is properly collapsed according to Section 3.3.3 of the spec.
This is the first even slightly complicated test; boy are these painful in
C!  Had to add a test of a helper routine as well; that just uses assertions
since the test framework should not be dealing with tests of the tester,
just of Expat.

Added a helper to make the failure messages more useful when Expat produces
an unexpected error code; we now include the error message and location from
Expat.  This is mostly useful when developing a new test.


Index: runtests.c
===================================================================
RCS file: /cvsroot/expat/expat/tests/runtests.c,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -d -r1.4 -r1.5
*** runtests.c	2001/11/13 17:31:53	1.4
--- runtests.c	2001/11/16 20:19:39	1.5
***************
*** 1,4 ****
--- 1,6 ----
+ #include <assert.h>
  #include <check.h>
  #include <stdlib.h>
+ #include <stdio.h>
  
  #include "expat.h"
***************
*** 19,37 ****
  basic_teardown(void)
  {
!     if (parser != NULL) {
          XML_ParserFree(parser);
-     }
  }
  
  
  START_TEST(test_nul_byte)
  {
!     char *text = "<doc>\0</doc>";
  
      /* test that a NUL byte (in US-ASCII data) is an error */
!     if (XML_Parse(parser, text, 12, 1))
          fail("Parser did not report error on NUL-byte.");
!     fail_unless(XML_GetErrorCode(parser) == XML_ERROR_INVALID_TOKEN,
!                 "Got wrong error code for NUL-byte in US-ASCII encoding.");
  }
  END_TEST
--- 21,52 ----
  basic_teardown(void)
  {
!     if (parser != NULL)
          XML_ParserFree(parser);
  }
  
+ /* Generate a failure using the parser state to create an error message;
+  * this should be used when the parser reports and error we weren't
+  * expecting.
+  */
+ static void
+ xml_failure(void)
+ {
+     char buffer[256];
+     sprintf(buffer, "%s (line %d, offset %d)",
+             XML_ErrorString(XML_GetErrorCode(parser)),
+             XML_GetCurrentLineNumber(parser),
+             XML_GetCurrentColumnNumber(parser));
+     fail(buffer);
+ }
  
  START_TEST(test_nul_byte)
  {
!     char text[] = "<doc>\0</doc>";
  
      /* test that a NUL byte (in US-ASCII data) is an error */
!     if (XML_Parse(parser, text, sizeof(text) - 1, 1))
          fail("Parser did not report error on NUL-byte.");
!     if (XML_GetErrorCode(parser) != XML_ERROR_INVALID_TOKEN)
!         xml_failure();
  }
  END_TEST
***************
*** 45,50 ****
      if (XML_Parse(parser, text, strlen(text), 1))
          fail("Parser did not report error on NUL-byte.");
!     fail_unless(XML_GetErrorCode(parser) == XML_ERROR_BAD_CHAR_REF,
!                 "Got wrong error code for &#0;.");
  }
  END_TEST
--- 60,65 ----
      if (XML_Parse(parser, text, strlen(text), 1))
          fail("Parser did not report error on NUL-byte.");
!     if (XML_GetErrorCode(parser) != XML_ERROR_BAD_CHAR_REF)
!         xml_failure();
  }
  END_TEST
***************
*** 59,64 ****
  
      if (!XML_Parse(parser, text, strlen(text), 1)) {
!         fail_unless(XML_GetErrorCode(parser) == XML_ERROR_MISPLACED_XML_PI,
!                     "wrong error when XML declaration is misplaced");
      }
      else {
--- 74,79 ----
  
      if (!XML_Parse(parser, text, strlen(text), 1)) {
!         if (XML_GetErrorCode(parser) != XML_ERROR_MISPLACED_XML_PI)
!             xml_failure();
      }
      else {
***************
*** 74,78 ****
  
      if (!XML_Parse(parser, text, strlen(text), 1))
!         fail("false error reported for UTF-8 BOM");
  }
  END_TEST
--- 89,93 ----
  
      if (!XML_Parse(parser, text, strlen(text), 1))
!         xml_failure();
  }
  END_TEST
***************
*** 83,87 ****
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
!         fail("false error reported for UTF-16-BE BOM");
  }
  END_TEST
--- 98,102 ----
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
!         xml_failure();
  }
  END_TEST
***************
*** 92,99 ****
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
!         fail("false error reported for UTF-16-LE BOM");
  }
  END_TEST
  
  static Suite *
  make_basic_suite(void)
--- 107,224 ----
  
      if (!XML_Parse(parser, text, sizeof(text) - 1, 1))
!         xml_failure();
  }
  END_TEST
  
+ 
+ /* Helpers used by the following test; this checks any "attr" and "refs"
+  * attributes to make sure whitespace has been normalized.
+  */
+ 
+ /* Return true if whitespace has been normalized in a string, using
+  * the rules for attribute value normalization.  The 'is_cdata' flag
+  * is needed since CDATA attributes don't need to have multiple
+  * whitespace characters collapsed to a single space, while other
+  * attribute data types do.  (Section 3.3.3 of the recommendation.)
+  */
+ static int
+ is_whitespace_normalized(const XML_Char *s, int is_cdata)
+ {
+     int blanks = 0;
+     int at_start = 1;
+     while (*s) {
+         if (*s == ' ')
+             ++blanks;
+         else if (*s == '\t' || *s == '\n' || *s == '\r')
+             return 0;
+         else {
+             if (at_start) {
+                 at_start = 0;
+                 if (blanks && !is_cdata)
+                     /* illegal leading blanks */
+                     return 0;
+             }
+             else if (blanks > 1 && !is_cdata)
+                 return 0;
+             blanks = 0;
+         }
+         ++s;
+     }
+     if (blanks && !is_cdata)
+         return 0;
+     return 1;
+ }
+ 
+ /* Check the attribute whitespace checker: */
+ static void
+ testhelper_is_whitespace_normalized(void)
+ {
+     assert(is_whitespace_normalized("abc", 0));
+     assert(is_whitespace_normalized("abc", 1));
+     assert(is_whitespace_normalized("abc def ghi", 0));
+     assert(is_whitespace_normalized("abc def ghi", 1));
+     assert(!is_whitespace_normalized(" abc def ghi", 0));
+     assert(is_whitespace_normalized(" abc def ghi", 1));
+     assert(!is_whitespace_normalized("abc  def ghi", 0));
+     assert(is_whitespace_normalized("abc  def ghi", 1));
+     assert(!is_whitespace_normalized("abc def ghi ", 0));
+     assert(is_whitespace_normalized("abc def ghi ", 1));
+     assert(!is_whitespace_normalized(" ", 0));
+     assert(is_whitespace_normalized(" ", 1));
+     assert(!is_whitespace_normalized("\t", 0));
+     assert(!is_whitespace_normalized("\t", 1));
+     assert(!is_whitespace_normalized("\n", 0));
+     assert(!is_whitespace_normalized("\n", 1));
+     assert(!is_whitespace_normalized("\r", 0));
+     assert(!is_whitespace_normalized("\r", 1));
+     assert(!is_whitespace_normalized("abc\t def", 1));
+ }
+ 
+ static void
+ check_attr_contains_normalized_whitespace(void *userdata,
+                                           const XML_Char *name,
+                                           const XML_Char **atts)
+ {
+     int i;
+     for (i = 0; atts[i] != NULL; i += 2) {
+         const XML_Char *attrname = atts[i];
+         const XML_Char *value = atts[i + 1];
+         if (strcmp("attr", attrname) == 0
+             || strcmp("ents", attrname) == 0
+             || strcmp("refs", attrname) == 0) {
+             if (!is_whitespace_normalized(value, 0)) {
+                 char buffer[256];
+                 sprintf(buffer, "attribute value not normalized: %s='%s'",
+                         attrname, value);
+                 fail(buffer);
+             }
+         }
+     }
+ }
+ 
+ START_TEST(test_attr_whitespace_normalization)
+ {
+     char *text =
+         "<!DOCTYPE doc [\n"
+         "  <!ATTLIST doc\n"
+         "            attr NMTOKENS #REQUIRED\n"
+         "            ents ENTITIES #REQUIRED\n"
+         "            refs IDREFS   #REQUIRED>\n"
+         "]>\n"
+         "<doc attr='    a  b c\t\td\te\t' refs=' id-1   \t  id-2\t\t'  \n"
+         "     ents=' ent-1   \t\r\n"
+         "            ent-2  ' >\n"
+         "  <e id='id-1'/>\n"
+         "  <e id='id-2'/>\n"
+         "</doc>";
+ 
+     XML_SetStartElementHandler(parser,
+                                check_attr_contains_normalized_whitespace);
+     if (!XML_Parse(parser, text, strlen(text), 1))
+         xml_failure();
+ }
+ END_TEST
+ 
+ 
  static Suite *
  make_basic_suite(void)
***************
*** 101,104 ****
--- 226,230 ----
      Suite *s = suite_create("basic");
      TCase *tc_chars = tcase_create("character tests");
+     TCase *tc_attrs = tcase_create("attributes");
      TCase *tc_xmldecl = tcase_create("XML declaration");
  
***************
*** 111,114 ****
--- 237,244 ----
      tcase_add_test(tc_chars, test_bom_utf16_le);
  
+     suite_add_tcase(s, tc_attrs);
+     tcase_add_checked_fixture(tc_attrs, basic_setup, basic_teardown);
+     tcase_add_test(tc_attrs, test_attr_whitespace_normalization);
+ 
      suite_add_tcase(s, tc_xmldecl);
      tcase_add_checked_fixture(tc_xmldecl, basic_setup, basic_teardown);
***************
*** 127,130 ****
--- 257,263 ----
      Suite *s = make_basic_suite();
      SRunner *sr = srunner_create(s);
+ 
+     /* run the tests for internal helper functions */
+     testhelper_is_whitespace_normalized();
  
      for (i = 1; i < argc; ++i) {