[pypy-commit] cffi cffi-1.0: Starting to reuse some code from creflect for parsing C types
arigo
noreply at buildbot.pypy.org
Wed Apr 8 16:53:48 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch: cffi-1.0
Changeset: r1682:bd1b2331850e
Date: 2015-04-08 16:54 +0200
http://bitbucket.org/cffi/cffi/changeset/bd1b2331850e/
Log: Starting to reuse some code from creflect for parsing C types
diff --git a/new/parse_c_type.c b/new/parse_c_type.c
new file mode 100644
--- /dev/null
+++ b/new/parse_c_type.c
@@ -0,0 +1,590 @@
+#include <stdlib.h>
+#include <assert.h>
+#include "parse_c_type.h"
+
+
+enum token_e {
+ TOK_STAR='*',
+ TOK_OPEN_PAREN='(',
+ TOK_CLOSE_PAREN=')',
+ TOK_OPEN_BRACKET='[',
+ TOK_CLOSE_BRACKET=']',
+ TOK_COMMA=',',
+
+ TOK_START=256,
+ TOK_END,
+ TOK_ERROR,
+ TOK_IDENTIFIER,
+ TOK_INTEGER,
+ TOK_DOTDOTDOT,
+
+ /* keywords */
+ TOK__BOOL,
+ TOK_CHAR,
+ //TOK__COMPLEX,
+ TOK_CONST,
+ TOK_DOUBLE,
+ TOK_FLOAT,
+ //TOK__IMAGINARY,
+ TOK_INT,
+ TOK_LONG,
+ TOK_SHORT,
+ TOK_SIGNED,
+ TOK_STRUCT,
+ TOK_UNION,
+ TOK_UNSIGNED,
+ TOK_VOID,
+ TOK_VOLATILE,
+};
+
+typedef struct {
+ enum token_e kind;
+ const char *p, **error_location, **error_message;
+ size_t size;
+ ctype_opcode_t *opcodes, *opcodes_end;
+} token_t;
+
+static int is_space(char x)
+{
+ return (x == ' ' || x == '\f' || x == '\n' || x == '\r' ||
+ x == '\t' || x == '\v');
+}
+
+static int is_ident_first(char x)
+{
+ return (('A' <= x && x <= 'Z') || ('a' <= x && x <= 'z') || x == '_');
+}
+
+static int is_digit(char x)
+{
+ return ('0' <= x && x <= '9');
+}
+
+static int is_ident_next(char x)
+{
+ return (is_ident_first(x) || is_digit(x));
+}
+
+static char get_following_char(token_t *tok)
+{
+ const char *p = tok->p + tok->size;
+ if (tok->kind == TOK_ERROR)
+ return 0;
+ while (is_space(*p))
+ p++;
+ return *p;
+}
+
+static void next_token(token_t *tok)
+{
+ const char *p = tok->p + tok->size;
+ if (tok->kind == TOK_ERROR)
+ return;
+ while (!is_ident_first(*p)) {
+ if (is_space(*p)) {
+ p++;
+ }
+ else if (is_digit(*p)) {
+ tok->kind = TOK_INTEGER;
+ tok->p = p;
+ tok->size = 1;
+ while (is_digit(p[tok->size]))
+ tok->size++;
+ return;
+ }
+ else if (p[0] == '.' && p[1] == '.' && p[2] == '.') {
+ tok->kind = TOK_DOTDOTDOT;
+ tok->p = p;
+ tok->size = 3;
+ return;
+ }
+ else if (*p) {
+ tok->kind = *p;
+ tok->p = p;
+ tok->size = 1;
+ return;
+ }
+ else {
+ tok->kind = TOK_END;
+ tok->p = p;
+ tok->size = 0;
+ return;
+ }
+ }
+ tok->kind = TOK_IDENTIFIER;
+ tok->p = p;
+ tok->size = 1;
+ while (is_ident_next(p[tok->size]))
+ tok->size++;
+
+ switch (*p) {
+ case '_':
+ if (tok->size == 5 && !memcmp(p, "_Bool", 5)) tok->kind = TOK__BOOL;
+ break;
+ case 'c':
+ if (tok->size == 4 && !memcmp(p, "char", 4)) tok->kind = TOK_CHAR;
+ if (tok->size == 5 && !memcmp(p, "const", 5)) tok->kind = TOK_CONST;
+ break;
+ case 'd':
+ if (tok->size == 6 && !memcmp(p, "double", 6)) tok->kind = TOK_DOUBLE;
+ break;
+ case 'f':
+ if (tok->size == 5 && !memcmp(p, "float", 5)) tok->kind = TOK_FLOAT;
+ break;
+ case 'i':
+ if (tok->size == 3 && !memcmp(p, "int", 3)) tok->kind = TOK_INT;
+ break;
+ case 'l':
+ if (tok->size == 4 && !memcmp(p, "long", 4)) tok->kind = TOK_LONG;
+ break;
+ case 's':
+ if (tok->size == 5 && !memcmp(p, "short", 5)) tok->kind = TOK_SHORT;
+ if (tok->size == 6 && !memcmp(p, "signed", 6)) tok->kind = TOK_SIGNED;
+ if (tok->size == 6 && !memcmp(p, "struct", 6)) tok->kind = TOK_STRUCT;
+ break;
+ case 'u':
+ if (tok->size == 5 && !memcmp(p, "union", 5)) tok->kind = TOK_UNION;
+ if (tok->size == 8 && !memcmp(p,"unsigned",8)) tok->kind = TOK_UNSIGNED;
+ break;
+ case 'v':
+ if (tok->size == 4 && !memcmp(p, "void", 4)) tok->kind = TOK_VOID;
+ if (tok->size == 8 && !memcmp(p,"volatile",8)) tok->kind = TOK_VOLATILE;
+ break;
+ }
+}
+
+static void parse_error(token_t *tok, const char *msg)
+{
+ if (tok->kind != TOK_ERROR) {
+ tok->kind = TOK_ERROR;
+ if (tok->error_location)
+ *tok->error_location = tok->p;
+ if (tok->error_message)
+ *tok->error_message = msg;
+ }
+}
+
+static ctype_opcode_t *alloc_ds(token_t *tok, size_t num)
+{
+ ctype_opcode_t *result = tok->opcodes;
+ if (num > tok->opcodes_end - result) {
+ parse_error(tok, "type too lengthy");
+ return NULL;
+ }
+ tok->opcodes += num;
+ return result;
+}
+
+#if 0
+static void parse_complete(token_t *tok, _crx_qual_type *result);
+
+static void parse_sequel(token_t *tok, intptr_t ds_end)
+{
+ intptr_t *ds;
+ while (tok->kind == TOK_STAR || tok->kind == TOK_CONST ||
+ tok->kind == TOK_VOLATILE) {
+ ds = alloc_ds(tok, 1);
+ if (ds == NULL)
+ return;
+ ds[0] = tok->kind;
+ next_token(tok);
+ }
+
+ int check_for_grouping = -1;
+ if (tok->kind == TOK_IDENTIFIER) {
+ next_token(tok); /* skip a potential variable name */
+ check_for_grouping = 1;
+ }
+
+ intptr_t *jump_slot = alloc_ds(tok, 1);
+ if (jump_slot == NULL)
+ return;
+ *jump_slot = ds_end;
+
+ next_right_part:
+ check_for_grouping++;
+
+ switch (tok->kind) {
+
+ case TOK_OPEN_PAREN:
+ next_token(tok);
+
+ if (check_for_grouping == 0 && (tok->kind == TOK_STAR ||
+ tok->kind == TOK_CONST ||
+ tok->kind == TOK_VOLATILE ||
+ tok->kind == TOK_OPEN_BRACKET)) {
+ /* just parentheses for grouping */
+ ds = tok->delay_slots;
+ parse_sequel(tok, *jump_slot);
+ *jump_slot = -(ds - tok->all_delay_slots);
+ }
+ else {
+ /* function type */
+ ds = alloc_ds(tok, 2);
+ if (ds == NULL)
+ return;
+ ds[0] = TOK_OPEN_PAREN;
+ ds[1] = 0;
+ if (tok->kind == TOK_VOID && get_following_char(tok) == ')') {
+ next_token(tok);
+ }
+ if (tok->kind != TOK_CLOSE_PAREN) {
+ while (1) {
+ if (tok->kind == TOK_DOTDOTDOT) {
+ ds[0] = TOK_DOTDOTDOT;
+ next_token(tok);
+ break;
+ }
+ intptr_t *ds_type = alloc_ds(tok, 2);
+ if (ds_type == NULL)
+ return;
+ assert(ds_type == ds + 2 + 2 * ds[1]);
+ assert(2 * sizeof(intptr_t) >= sizeof(_crx_qual_type));
+ parse_complete(tok, (_crx_qual_type *)ds_type);
+ ds[1]++;
+ if (tok->kind != TOK_COMMA)
+ break;
+ next_token(tok);
+ }
+ }
+ intptr_t *ds_next = alloc_ds(tok, 1);
+ if (ds_next == NULL)
+ return;
+ assert(ds_next == ds + 2 + 2 * ds[1]);
+ *ds_next = *jump_slot;
+ *jump_slot = -(ds - tok->all_delay_slots);
+ }
+
+ if (tok->kind != TOK_CLOSE_PAREN) {
+ parse_error(tok, "expected ')'");
+ return;
+ }
+ next_token(tok);
+ goto next_right_part;
+
+ case TOK_OPEN_BRACKET:
+ {
+ uintptr_t length = (uintptr_t)-1;
+ next_token(tok);
+ if (tok->kind != TOK_CLOSE_BRACKET) {
+ if (tok->kind != TOK_INTEGER) {
+ parse_error(tok, "expected a positive integer constant");
+ return;
+ }
+
+ if (sizeof(uintptr_t) > sizeof(unsigned long))
+ length = strtoull(tok->p, NULL, 10);
+ else
+ length = strtoul(tok->p, NULL, 10);
+ if (length == (uintptr_t)-1) {
+ parse_error(tok, "number too large");
+ return;
+ }
+ next_token(tok);
+ }
+
+ if (tok->kind != TOK_CLOSE_BRACKET) {
+ parse_error(tok, "expected ']'");
+ return;
+ }
+ next_token(tok);
+
+ ds = alloc_ds(tok, 3);
+ if (ds == NULL)
+ return;
+ ds[0] = TOK_OPEN_BRACKET;
+ ds[1] = (intptr_t)length;
+ ds[2] = *jump_slot;
+ *jump_slot = -(ds - tok->all_delay_slots);
+ goto next_right_part;
+ }
+ default:
+ break;
+ }
+}
+#endif
+
+#if 0
+static void fetch_delay_slots(token_t *tok, _crx_qual_type *result,
+ intptr_t *delay_slot)
+{
+ if (tok->kind == TOK_ERROR)
+ return;
+ tok->delay_slots = delay_slot;
+ while (1) {
+ intptr_t tok_kind = *delay_slot++;
+ if (tok_kind <= 0) {
+ delay_slot = tok->all_delay_slots + (-tok_kind);
+ continue;
+ }
+ switch (tok_kind) {
+ case TOK_END:
+ return; /* done */
+ case TOK_STAR:
+ result->type = tok->cb->get_pointer_type(tok->cb, result->type,
+ result->qualifiers);
+ result->qualifiers = 0;
+ break;
+ case TOK_CONST:
+ result->qualifiers |= _CRX_CONST;
+ break;
+ case TOK_VOLATILE:
+ result->qualifiers |= _CRX_VOLATILE;
+ break;
+ case TOK_OPEN_BRACKET: /* array */
+ {
+ uintptr_t length = (uintptr_t)*delay_slot++;
+ if (length != (uintptr_t)-1)
+ result->type = tok->cb->get_array_type(
+ tok->cb, result->type, length);
+ else
+ result->type = tok->cb->get_incomplete_array_type(
+ tok->cb, result->type);
+ /* result->qualifiers remains unmodified */
+ break;
+ }
+ case TOK_OPEN_PAREN: /* function */
+ case TOK_DOTDOTDOT: /* function ending with a '...' */
+ {
+ intptr_t nbargs = *delay_slot++;
+ _crx_type_t *t1;
+ _crx_qual_type *argtypes = (_crx_qual_type *)delay_slot;
+ delay_slot += 2 * nbargs;
+ if (tok_kind == TOK_DOTDOTDOT)
+ t1 = tok->cb->get_ellipsis_function_type(tok->cb,
+ result->type,
+ argtypes, nbargs);
+ else
+ t1 = tok->cb->get_function_type(tok->cb, result->type,
+ argtypes, nbargs, NULL);
+ result->type = t1;
+ result->qualifiers = 0; /* drop qualifiers on the return type */
+ break;
+ }
+ default:
+ assert(!"missing delay slot case");
+ }
+ }
+}
+#endif
+
+static void parse_complete(token_t *tok)
+{
+ int const_qualifier = 0, volatile_qualifier = 0;
+
+ qualifiers:
+ switch (tok->kind) {
+ case TOK_CONST:
+ const_qualifier = 1;
+ next_token(tok);
+ goto qualifiers;
+ case TOK_VOLATILE:
+ volatile_qualifier = 1;
+ next_token(tok);
+ goto qualifiers;
+ default:
+ ;
+ }
+
+ int t1;
+ int modifiers_length = 0;
+ int modifiers_sign = 0;
+ modifiers:
+ switch (tok->kind) {
+
+ case TOK_SHORT:
+ if (modifiers_length != 0) {
+ parse_error(tok, "'short' after another 'short' or 'long'");
+ return;
+ }
+ modifiers_length--;
+ next_token(tok);
+ goto modifiers;
+
+ case TOK_LONG:
+ if (modifiers_length < 0) {
+ parse_error(tok, "'long' after 'short'");
+ return;
+ }
+ if (modifiers_length >= 2) {
+ parse_error(tok, "'long long long' is too long");
+ return;
+ }
+ modifiers_length++;
+ next_token(tok);
+ goto modifiers;
+
+ case TOK_SIGNED:
+ if (modifiers_sign) {
+ parse_error(tok, "multiple 'signed' or 'unsigned'");
+ return;
+ }
+ modifiers_sign++;
+ next_token(tok);
+ goto modifiers;
+
+ case TOK_UNSIGNED:
+ if (modifiers_sign) {
+ parse_error(tok, "multiple 'signed' or 'unsigned'");
+ return;
+ }
+ modifiers_sign--;
+ next_token(tok);
+ goto modifiers;
+
+ default:
+ break;
+ }
+
+ if (modifiers_length || modifiers_sign) {
+
+ switch (tok->kind) {
+
+ case TOK_VOID:
+ case TOK__BOOL:
+ case TOK_FLOAT:
+ case TOK_STRUCT:
+ case TOK_UNION:
+ parse_error(tok, "invalid combination of types");
+ return;
+
+ case TOK_DOUBLE:
+ if (modifiers_sign != 0 || modifiers_length != 1) {
+ parse_error(tok, "invalid combination of types");
+ return;
+ }
+ next_token(tok);
+ t1 = CTOP_LONGDOUBLE;
+ break;
+
+ case TOK_CHAR:
+ if (modifiers_length != 0) {
+ parse_error(tok, "invalid combination of types");
+ return;
+ }
+ modifiers_length = -2;
+ /* fall-through */
+ case TOK_INT:
+ next_token(tok);
+ /* fall-through */
+ default:
+ if (modifiers_sign >= 0)
+ switch (modifiers_length) {
+ case -2: t1 = CTOP_SCHAR; break;
+ case -1: t1 = CTOP_SHORT; break;
+ case 1: t1 = CTOP_LONG; break;
+ case 2: t1 = CTOP_LONGLONG; break;
+ default: t1 = CTOP_INT; break;
+ }
+ else
+ switch (modifiers_length) {
+ case -2: t1 = CTOP_UCHAR; break;
+ case -1: t1 = CTOP_USHORT; break;
+ case 1: t1 = CTOP_ULONG; break;
+ case 2: t1 = CTOP_ULONGLONG; break;
+ default: t1 = CTOP_UINT; break;
+ }
+ }
+ }
+ else {
+ switch (tok->kind) {
+ case TOK_INT:
+ t1 = CTOP_INT;
+ break;
+ case TOK_CHAR:
+ t1 = CTOP_CHAR;
+ break;
+ case TOK_VOID:
+ t1 = CTOP_VOID;
+ break;
+ case TOK__BOOL:
+ t1 = CTOP_BOOL;
+ break;
+ case TOK_FLOAT:
+ t1 = CTOP_FLOAT;
+ break;
+ case TOK_DOUBLE:
+ t1 = CTOP_DOUBLE;
+ break;
+ case TOK_IDENTIFIER:
+ {
+ abort();
+#if 0
+ _crx_qual_type qt2;
+ char identifier[1024];
+ if (tok->size >= 1024) {
+ parse_error(tok, "identifier name too long");
+ return;
+ }
+ memcpy(identifier, tok->p, tok->size);
+ identifier[tok->size] = 0;
+ qt2 = tok->cb->get_user_type(tok->cb, identifier);
+ t1 = qt2.type;
+ result->qualifiers |= qt2.qualifiers;
+ break;
+#endif
+ }
+ case TOK_STRUCT:
+ case TOK_UNION:
+ {
+ abort();
+#if 0
+ char identifier[1024];
+ int kind = tok->kind;
+ next_token(tok);
+ if (tok->kind != TOK_IDENTIFIER) {
+ parse_error(tok, "struct or union name expected");
+ return;
+ }
+ if (tok->size >= 1024) {
+ parse_error(tok, "struct or union name too long");
+ return;
+ }
+ memcpy(identifier, tok->p, tok->size);
+ identifier[tok->size] = 0;
+ if (kind == TOK_STRUCT)
+ t1 = tok->cb->get_struct_type(tok->cb, identifier);
+ else
+ t1 = tok->cb->get_union_type(tok->cb, identifier);
+ break;
+#endif
+ }
+ default:
+ parse_error(tok, "identifier expected");
+ return;
+ }
+ next_token(tok);
+ }
+ *alloc_ds(tok, 1) = t1;
+ if (const_qualifier)
+ *alloc_ds(tok, 1) = CTOP_CONST;
+ if (volatile_qualifier)
+ *alloc_ds(tok, 1) = CTOP_VOLATILE;
+
+ //parse_sequel(tok, CTOP_END);
+ *alloc_ds(tok, 1) = CTOP_END;
+}
+
+
+int parse_c_type(const char *input,
+ ctype_opcode_t *output, size_t output_size,
+ const char **error_loc, const char **error_msg)
+{
+ token_t token;
+
+ token.kind = TOK_START;
+ token.p = input;
+ token.error_location = error_loc;
+ token.error_message = error_msg;
+ token.size = 0;
+ token.opcodes = output;
+ token.opcodes_end = output + output_size;
+ next_token(&token);
+ parse_complete(&token);
+
+ if (token.kind != TOK_END) {
+ parse_error(&token, "unexpected symbol");
+ return -1;
+ }
+ return 0;
+}
diff --git a/new/parse_c_type.h b/new/parse_c_type.h
new file mode 100644
--- /dev/null
+++ b/new/parse_c_type.h
@@ -0,0 +1,29 @@
+
+
+typedef int ctype_opcode_t;
+
+#define CTOP_END 0
+#define CTOP_CONST 1
+#define CTOP_VOLATILE 2
+
+#define CTOP_VOID 100
+#define CTOP_BOOL 101
+#define CTOP_CHAR 102
+#define CTOP_SCHAR 103
+#define CTOP_UCHAR 104
+#define CTOP_SHORT 105
+#define CTOP_USHORT 106
+#define CTOP_INT 107
+#define CTOP_UINT 108
+#define CTOP_LONG 109
+#define CTOP_ULONG 110
+#define CTOP_LONGLONG 111
+#define CTOP_ULONGLONG 112
+#define CTOP_FLOAT 113
+#define CTOP_DOUBLE 114
+#define CTOP_LONGDOUBLE 115
+
+
+int parse_c_type(const char *input,
+ ctype_opcode_t *output, size_t output_size,
+ const char **error_loc, const char **error_msg);
diff --git a/new/test_parse_c_type.py b/new/test_parse_c_type.py
new file mode 100644
--- /dev/null
+++ b/new/test_parse_c_type.py
@@ -0,0 +1,53 @@
+import os
+import cffi
+
+ffi = cffi.FFI()
+ffi.cdef("""
+typedef int ctype_opcode_t;
+
+#define CTOP_END ...
+#define CTOP_CONST ...
+#define CTOP_VOLATILE ...
+
+#define CTOP_VOID ...
+#define CTOP_BOOL ...
+#define CTOP_CHAR ...
+#define CTOP_SCHAR ...
+#define CTOP_UCHAR ...
+#define CTOP_SHORT ...
+#define CTOP_USHORT ...
+#define CTOP_INT ...
+#define CTOP_UINT ...
+#define CTOP_LONG ...
+#define CTOP_ULONG ...
+#define CTOP_LONGLONG ...
+#define CTOP_ULONGLONG ...
+#define CTOP_FLOAT ...
+#define CTOP_DOUBLE ...
+#define CTOP_LONGDOUBLE ...
+
+int parse_c_type(const char *input,
+ ctype_opcode_t *output, size_t output_size,
+ const char **error_loc, const char **error_msg);
+""")
+
+lib = ffi.verify(open('parse_c_type.c').read(),
+ include_dirs=[os.getcwd()])
+
+
+def test_simple():
+ out = ffi.new("ctype_opcode_t[]", 100)
+ for simple_type, expected in [
+ ("int", lib.CTOP_INT),
+ ("signed int", lib.CTOP_INT),
+ (" long ", lib.CTOP_LONG),
+ ("long int", lib.CTOP_LONG),
+ ("unsigned short", lib.CTOP_USHORT),
+ ("long double", lib.CTOP_LONGDOUBLE),
+ ]:
+ for j in range(len(out)):
+ out[j] = -424242
+ res = lib.parse_c_type(simple_type, out, 100, ffi.NULL, ffi.NULL)
+ assert res == 0
+ assert out[0] == expected
+ assert out[1] == lib.CTOP_END
More information about the pypy-commit
mailing list