[Python-checkins] bpo-29505: Fuzz json module, enforce size limit on int(x) fuzz (GH-13991)

Miss Islington (bot) webhook-mailer at python.org
Wed Jun 12 00:47:45 EDT 2019


https://github.com/python/cpython/commit/534136ac6790a701e24f364a9b7f1e34bf5f3ce7
commit: 534136ac6790a701e24f364a9b7f1e34bf5f3ce7
branch: 3.7
author: Miss Islington (bot) <31488909+miss-islington at users.noreply.github.com>
committer: GitHub <noreply at github.com>
date: 2019-06-11T21:47:42-07:00
summary:

bpo-29505: Fuzz json module, enforce size limit on int(x) fuzz (GH-13991)


* bpo-29505: Enable fuzz testing of the json module, enforce size limit on int(x) fuzz and json input size to avoid timeouts.

Contributed by by Ammar Askar for Google.
(cherry picked from commit a6e190e94b47324f14e22a09200c68b722d55699)

Co-authored-by: Ammar Askar <aaskar at google.com>

files:
A Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
A Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json
M Modules/_xxtestfuzz/README.rst
M Modules/_xxtestfuzz/fuzz_tests.txt
M Modules/_xxtestfuzz/fuzzer.c

diff --git a/Modules/_xxtestfuzz/README.rst b/Modules/_xxtestfuzz/README.rst
index b48f3c89a42b..42bd02a03cbe 100644
--- a/Modules/_xxtestfuzz/README.rst
+++ b/Modules/_xxtestfuzz/README.rst
@@ -35,6 +35,16 @@ And invoke it from ``LLVMFuzzerTestOneInput``::
 ``LLVMFuzzerTestOneInput`` will run in oss-fuzz, with each test in
 ``fuzz_tests.txt`` run separately.
 
+Seed data (corpus) for the test can be provided in a subfolder called
+``<test_name>_corpus`` such as ``fuzz_json_loads_corpus``. A wide variety
+of good input samples allows the fuzzer to more easily explore a diverse
+set of paths and provides a better base to find buggy input from.
+
+Dictionaries of tokens (see oss-fuzz documentation for more details) can
+be placed in the ``dictionaries`` folder with the name of the test.
+For example, ``dictionaries/fuzz_json_loads.dict`` contains JSON tokens
+to guide the fuzzer.
+
 What makes a good fuzz test
 ---------------------------
 
diff --git a/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict b/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
new file mode 100644
index 000000000000..ad64917ccc2c
--- /dev/null
+++ b/Modules/_xxtestfuzz/dictionaries/fuzz_json_loads.dict
@@ -0,0 +1,40 @@
+"0"
+",0"
+":0"
+"0:"
+"-1.2e+3"
+
+"true"
+"false"
+"null"
+
+"\"\""
+",\"\""
+":\"\""
+"\"\":"
+
+"{}"
+",{}"
+":{}"
+"{\"\":0}"
+"{{}}"
+
+"[]"
+",[]"
+":[]"
+"[0]"
+"[[]]"
+
+"''"
+"\\"
+"\\b"
+"\\f"
+"\\n"
+"\\r"
+"\\t"
+"\\u0000"
+"\\x00"
+"\\0"
+"\\uD800\\uDC00"
+"\\uDBFF\\uDFFF"
+
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json
new file mode 100644
index 000000000000..fe51488c7066
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_array.json
@@ -0,0 +1 @@
+[]
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json
new file mode 100644
index 000000000000..0967ef424bce
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/empty_object.json
@@ -0,0 +1 @@
+{}
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
new file mode 100644
index 000000000000..70e268543692
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass1.json
@@ -0,0 +1,58 @@
+[
+    "JSON Test Pattern pass1",
+    {"object with 1 member":["array with 1 element"]},
+    {},
+    [],
+    -42,
+    true,
+    false,
+    null,
+    {
+        "integer": 1234567890,
+        "real": -9876.543210,
+        "e": 0.123456789e-12,
+        "E": 1.234567890E+34,
+        "":  23456789012E66,
+        "zero": 0,
+        "one": 1,
+        "space": " ",
+        "quote": "\"",
+        "backslash": "\\",
+        "controls": "\b\f\n\r\t",
+        "slash": "/ & \/",
+        "alpha": "abcdefghijklmnopqrstuvwyz",
+        "ALPHA": "ABCDEFGHIJKLMNOPQRSTUVWYZ",
+        "digit": "0123456789",
+        "0123456789": "digit",
+        "special": "`1~!@#$%^&*()_+-={':[,]}|;.</>?",
+        "hex": "\u0123\u4567\u89AB\uCDEF\uabcd\uef4A",
+        "true": true,
+        "false": false,
+        "null": null,
+        "array":[  ],
+        "object":{  },
+        "address": "50 St. James Street",
+        "url": "http://www.JSON.org/",
+        "comment": "// /* <!-- --",
+        "# -- --> */": " ",
+        " s p a c e d " :[1,2 , 3
+
+,
+
+4 , 5        ,          6           ,7        ],"compact":[1,2,3,4,5,6,7],
+        "jsontext": "{\"object with 1 member\":[\"array with 1 element\"]}",
+        "quotes": "" \u0022 %22 0x22 034 &#x22;",
+        "\/\\\"\uCAFE\uBABE\uAB98\uFCDE\ubcda\uef4A\b\f\n\r\t`1~!@#$%^&*()_+-=[]{}|;:',./<>?"
+: "A key can be any string"
+    },
+    0.5 ,98.6
+,
+99.44
+,
+
+1066,
+1e1,
+0.1e1,
+1e-1,
+1e00,2e+00,2e-00
+,"rosebud"]
\ No newline at end of file
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
new file mode 100644
index 000000000000..d3c63c7ad845
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass2.json
@@ -0,0 +1 @@
+[[[[[[[[[[[[[[[[[[["Not too deep"]]]]]]]]]]]]]]]]]]]
\ No newline at end of file
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
new file mode 100644
index 000000000000..4528d51f1ac6
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/pass3.json
@@ -0,0 +1,6 @@
+{
+    "JSON Test Pattern pass3": {
+        "The outermost value": "must be an object or array.",
+        "In this test": "It is an object."
+    }
+}
diff --git a/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json
new file mode 100644
index 000000000000..ce1e6ecaec72
--- /dev/null
+++ b/Modules/_xxtestfuzz/fuzz_json_loads_corpus/simple_array.json
@@ -0,0 +1 @@
+[1, 2, 3, "abcd", "xyz"]
diff --git a/Modules/_xxtestfuzz/fuzz_tests.txt b/Modules/_xxtestfuzz/fuzz_tests.txt
index 2e53bfdc7161..f0121291eaa0 100644
--- a/Modules/_xxtestfuzz/fuzz_tests.txt
+++ b/Modules/_xxtestfuzz/fuzz_tests.txt
@@ -1,3 +1,4 @@
 fuzz_builtin_float
 fuzz_builtin_int
 fuzz_builtin_unicode
+fuzz_json_loads
diff --git a/Modules/_xxtestfuzz/fuzzer.c b/Modules/_xxtestfuzz/fuzzer.c
index 54f816ebc93d..e862a99cfb34 100644
--- a/Modules/_xxtestfuzz/fuzzer.c
+++ b/Modules/_xxtestfuzz/fuzzer.c
@@ -28,8 +28,15 @@ static int fuzz_builtin_float(const char* data, size_t size) {
     return 0;
 }
 
+#define MAX_INT_TEST_SIZE 0x10000
+
 /* Fuzz PyLong_FromUnicodeObject as a proxy for int(str). */
 static int fuzz_builtin_int(const char* data, size_t size) {
+    /* Ignore test cases with very long ints to avoid timeouts
+       int("9" * 1000000) is not a very interesting test caase */
+    if (size > MAX_INT_TEST_SIZE) {
+        return 0;
+    }
     /* Pick a random valid base. (When the fuzzed function takes extra
        parameters, it's somewhat normal to hash the input to generate those
        parameters. We want to exercise all code paths, so we do so here.) */
@@ -72,6 +79,42 @@ static int fuzz_builtin_unicode(const char* data, size_t size) {
     return 0;
 }
 
+#define MAX_JSON_TEST_SIZE 0x10000
+
+/* Initialized in LLVMFuzzerTestOneInput */
+PyObject* json_loads_method = NULL;
+/* Fuzz json.loads(x) */
+static int fuzz_json_loads(const char* data, size_t size) {
+    /* Since python supports arbitrarily large ints in JSON,
+       long inputs can lead to timeouts on boring inputs like
+       `json.loads("9" * 100000)` */
+    if (size > MAX_JSON_TEST_SIZE) {
+        return 0;
+    }
+    PyObject* input_bytes = PyBytes_FromStringAndSize(data, size);
+    if (input_bytes == NULL) {
+        return 0;
+    }
+    PyObject* parsed = PyObject_CallFunctionObjArgs(json_loads_method, input_bytes, NULL);
+    /* Ignore ValueError as the fuzzer will more than likely
+       generate some invalid json and values */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_ValueError)) {
+        PyErr_Clear();
+    }
+    /* Ignore RecursionError as the fuzzer generates long sequences of
+       arrays such as `[[[...` */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_RecursionError)) {
+        PyErr_Clear();
+    }
+    /* Ignore unicode errors, invalid byte sequences are common */
+    if (parsed == NULL && PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) {
+        PyErr_Clear();
+    }
+    Py_DECREF(input_bytes);
+    Py_XDECREF(parsed);
+    return 0;
+}
+
 /* Run fuzzer and abort on failure. */
 static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char* , size_t)) {
     int rv = fuzzer((const char*) data, size);
@@ -88,7 +131,6 @@ static int _run_fuzz(const uint8_t *data, size_t size, int(*fuzzer)(const char*
 /* CPython generates a lot of leak warnings for whatever reason. */
 int __lsan_is_turned_off(void) { return 1; }
 
-wchar_t wide_program_name[NAME_MAX];
 
 int LLVMFuzzerInitialize(int *argc, char ***argv) {
     wchar_t* wide_program_name = Py_DecodeLocale(*argv[0], NULL);
@@ -110,6 +152,12 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
            initialize CPython ourselves on the first run. */
         Py_InitializeEx(0);
     }
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+    if (json_loads_method == NULL) {
+        PyObject* json_module = PyImport_ImportModule("json");
+        json_loads_method = PyObject_GetAttrString(json_module, "loads");
+    }
+#endif
 
     int rv = 0;
 
@@ -121,6 +169,9 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
 #endif
 #if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_builtin_unicode)
     rv |= _run_fuzz(data, size, fuzz_builtin_unicode);
+#endif
+#if !defined(_Py_FUZZ_ONE) || defined(_Py_FUZZ_fuzz_json_loads)
+    rv |= _run_fuzz(data, size, fuzz_json_loads);
 #endif
   return rv;
 }



More information about the Python-checkins mailing list