parsing c-code
Paul McGuire
ptmcg at
Thu Jan 15 12:02:26 EST 2004
"John Benson" <jsbenson at> wrote in message
news:mailman.368.1074113221.12720.python-list at
> I'm currently making my way through Thomas Christopher's Python
> Patterns which describes an LL(k) parser generator that might be of help.
> know there are other Python parsing tools out there as well, per Chapter
> of the Python Cookbook.
> Message: 4
> Date: Wed, 14 Jan 2004 09:27:55 +0100
> From: "tm" <js.bach at>
> Subject: Parsing c-code
> To: python-list at
> Message-ID: <bu2uoj$8pc$1 at>
> Hello,
> are there recommended modules for parsing c-code.
> I want to read in c-structs and display it in a tree graphic.
> --
> Torsten
If your c-structs aren't too complicated, try this (uses pyparsing, from
from pyparsing import Optional, Word, Literal, Forward, alphas, nums, Group,
ZeroOrMore, oneOf, delimitedList, cStyleComment, restOfLine
import pprint
cstructBNF = None
def getCStructBNF():
global cstructBNF
if cstructBNF is None:
structDecl = Forward()
ident = Word( alphas+"_", alphas+nums+"_$" )
integer = Word( nums )
semi = Literal(";").suppress()
typeName = ident
varName = ident
arraySizeSpecifier = integer | ident # <- should really support an
expression here, but keep simple for now
typeSpec = Optional("unsigned") + oneOf("int long short double char
bitfieldspec = ":" + arraySizeSpecifier
varnamespec = Group( Optional("*", default="") + varName +
Optional( bitfieldspec | ( "[" + arraySizeSpecifier + "]" ) ) )
memberDecl = Group( ( typeSpec | typeName ) + Group(
delimitedList( varnamespec ) )
+ semi ) | structDecl
structDecl << Group( "struct" + Optional(ident) + "{" +
ZeroOrMore( memberDecl ) + "}" + Optional(varnamespec) + semi )
cstructBNF = structDecl
cplusplusLineComment = Literal("//") + restOfLine
cstructBNF.ignore( cStyleComment ) # never know where these will
crop up!
cstructBNF.ignore( cplusplusLineComment ) # or these either
return cstructBNF
testData1 = """
struct {
long a;
short b;
char c[32];
} a;
testData2 = """
struct {
long a;
struct {
int x;
int y;
} pt; // this is an embedded struct
struct {
int x,y;
} pt2;
struct {
int x;
int y;
}* coordPtr; /* this is just a pointer to a struct */
short b;
char c[32];
char d[MAX_LENGTH /* + 1 to make room for terminating null */ ];
char* name;
char *name2; /* no one can agree where the '*' should go */
int bitfield:5; /* this is rare, but not hard to add to parse
grammar */
int bitfield2:BIT2LEN;
void* otherData;
} a;
for testdata in (testData1, testData2):
pprint.pprint( getCStructBNF().parseString(testdata).asList() )
More information about the Python-list
mailing list