parsing c-code

Paul McGuire ptmcg at users.sourceforge.net
Thu Jan 15 18:02:26 CET 2004


"John Benson" <jsbenson at bensonsystems.com> wrote in message
news:mailman.368.1074113221.12720.python-list at python.org...
> I'm currently making my way through Thomas Christopher's Python
Programming
> Patterns which describes an LL(k) parser generator that might be of help.
I
> know there are other Python parsing tools out there as well, per Chapter
15
> of the Python Cookbook.
>
> Message: 4
> Date: Wed, 14 Jan 2004 09:27:55 +0100
> From: "tm" <js.bach at web.de>
> Subject: Parsing c-code
> To: python-list at python.org
> Message-ID: <bu2uoj$8pc$1 at piggy.rz.tu-ilmenau.de>
>
> Hello,
>
> are there recommended modules for parsing c-code.
> I want to read in c-structs and display it in a tree graphic.
>
> --
> Torsten
>
If your c-structs aren't too complicated, try this (uses pyparsing, from
http://pyparsing.sourceforge.net):

from pyparsing import Optional, Word, Literal, Forward, alphas, nums, Group,
ZeroOrMore, oneOf, delimitedList, cStyleComment, restOfLine
import pprint

cstructBNF = None
def getCStructBNF():
    global cstructBNF
    if cstructBNF is None:
        structDecl = Forward()
        ident = Word( alphas+"_", alphas+nums+"_$" )
        integer = Word( nums )
        semi = Literal(";").suppress()

        typeName = ident
        varName = ident
        arraySizeSpecifier = integer | ident  # <- should really support an
expression here, but keep simple for now
        typeSpec = Optional("unsigned") + oneOf("int long short double char
void")
        bitfieldspec = ":" + arraySizeSpecifier
        varnamespec = Group( Optional("*", default="") + varName +
Optional( bitfieldspec | ( "[" + arraySizeSpecifier + "]" ) ) )
        memberDecl = Group( ( typeSpec | typeName ) + Group(
delimitedList( varnamespec ) )
                            + semi ) | structDecl

        structDecl << Group( "struct" + Optional(ident) + "{" +
ZeroOrMore( memberDecl ) + "}" + Optional(varnamespec) + semi )

        cstructBNF = structDecl

        cplusplusLineComment = Literal("//") + restOfLine

        cstructBNF.ignore( cStyleComment )  # never know where these will
crop up!
        cstructBNF.ignore( cplusplusLineComment )  # or these either

    return cstructBNF


testData1 = """
    struct {
        long a;
        short b;
        char c[32];
        } a;
"""

testData2 = """
    struct {
        long a;
        struct {
            int x;
            int y;
            } pt;  // this is an embedded struct
        struct {
            int x,y;
            } pt2;
        struct {
            int x;
            int y;
            }* coordPtr; /* this is just a pointer to a struct */
        short b;
        char c[32];
        char d[MAX_LENGTH /* + 1 to make room for terminating null */ ];
        char* name;
        char *name2;  /* no one can agree where the '*' should go */
        int bitfield:5;  /* this is rare, but not hard to add to parse
grammar */
        int bitfield2:BIT2LEN;
        void* otherData;
        } a;
"""

for testdata in (testData1, testData2):
    pprint.pprint( getCStructBNF().parseString(testdata).asList() )
    print








More information about the Python-list mailing list