[XML-SIG] Announcing PyXPath 1.2

Martin v. Loewis martin@loewis.home.cs.tu-berlin.de
Fri, 29 Dec 2000 16:57:36 +0100


I have now completed the first fully-functional version of a 4XPath
parser, so PyXPath *should* work as a drop-in replacement of the
bison/lex part of 4XPath; essentially, it offers a function
pyxpath.Compile that has the same meaning as xml.xpath.Compile. It
uses the Parsed* classes of 4XPath as-is, so no modification to these
classes is necessary.

The distribution is available from

http://www.informatik.hu-berlin.de/~loewis/xml/PyXPath-1.2.tgz

To introduce some abstraction from the specific classes, and from the
fact that 4XPath uses bison token numbers in many places, I have
defined an abstract interface to XPath, which is attached
below. Unlike a former W3C effort, this API is currently designed
towards "pluggable parsers", i.e. the implementation of the abstract
syntax tree is separated from the parser engine.

This interface currently does not at all attempt to support
evaluation; thus it is orthogonal to Scott Boag's draft, which only
supported evaluation but not creation of an XPath tree. I plan to
extend that API to also support evaluation; contributions are welcome.

Even though I managed to make the current 4XPath classes to appear as
an implementation of that API, this conformance works so far only for
the ExprFactory interface. According to the API, each object should
have a number of attributes to allow navigation in the
expression. Since 4XPath does not expose any attributes, I decided to
come up with my own attribute names and types. I'd like to know
potential improvements to that API before making 4XPath fully
conforming.

The API is IDL based, which is meant in the same way as in the DOM:
there is a (yet to be specified) mapping to Python, which roughly
works that way:
- global constants are defined in the module xml.xpath.
- DOMString means Unicode objects, although normal strings should
  be accepted were possible.
- attributes are accessed as attributes; _get_ accessor functions
  are optional.

Any comments are welcome.

Regards,
Martin

module XPath{

  typedef wstring DOMString;

  const unsigned short ABSOLUTE_LOCATION_PATH = 1;
  const unsigned short ABBREVIATED_ABSOLUTE_LOCATION_PATH = 2;
  const unsigned short RELATIVE_LOCATION_PATH = 3;
  const unsigned short ABBREVIATED_RELATIVE_LOCATION_PATH = 4;
  const unsigned short STEP_EXPR = 5; // STEP would conflict with Step in case
  const unsigned short NODE_TEST = 6;
  const unsigned short NAME_TEST = 7;
  const unsigned short BINARY_EXPR = 8;
  const unsigned short UNARY_EXPR = 9;
  const unsigned short PATH_EXPR = 10;
  const unsigned short ABBREVIATED_PATH_EXPR = 11; // filter '//' path
  const unsigned short FILTER_EXPR = 12;
  const unsigned short VARIABLE_REFERENCE = 13;
  const unsigned short LITERAL_EXPR = 14;
  const unsigned short NUMBER_EXPR = 15;
  const unsigned short FUNCTION_CALL = 16;                              


  interface Expr{
    readonly attribute unsigned short exprType;
  };

  interface AbsoluteLocationPath;
  interface AbbreviatedAbsoluteLocationPath;
  interface RelativeLocationPath;
  interface Step;
  interface AxisSpecifier;
  interface NodeTest;
  typedef sequence<Expr> PredicateList, ExprList;
  interface NameTest;
  interface BinaryExpr;
  interface UnaryExpr;
  interface UnionExpr;
  interface PathExpr;
  interface FilterExpr;
  interface VariableReference;
  interface Literal;
  interface Number;
  interface FunctionCall;

  interface ExprFactory{
    AbsoluteLocationPath createAbsoluteLocationPath(in RelativeLocationPath p);
    AbsoluteLocationPath createAbbreviatedAbsoluteLocationPath(in RelativeLocationPath p);
    RelativeLocationPath createRelativeLocationPath(in RelativeLocationPath left,
						    in Step right);
    RelativeLocationPath createAbbreviatedRelativeLocationPath(in RelativeLocationPath left,
							       in Step right);

    Step createStep(in AxisSpecifier axis, in NodeTest test, in PredicateList predicates);
    // . is represented as self::node(); .. as parent::node()
    Step createAbbreviatedStep(in boolean dotdot); // false for .; true for ..
    // An omitted axisname is created as CHILD; @ is created as ATTRIBUTE

    AxisSpecifier createAxisSpecifier(in unsigned short name);

    NodeTest createNodeTest(in unsigned short type);
    NameTest createNameTest(in DOMString prefix, in DOMString localName);

    BinaryExpr createBinaryExpr(in unsigned short operator, in Expr left, in Expr right);

    UnaryExpr createUnaryExpr(in Expr exp);

    PathExpr createPathExpr(in Expr filter, in Expr path);
    // filter '//' path
    PathExpr createAbbreviatedPathExpr(in Expr filter, in Expr path);

    FilterExpr createFilterExpr(in Expr filter, in Expr predicate);

    // the name must still contain the leading $
    VariableReference createVariableReference(in DOMString name);

    Literal createLiteral(in DOMString literal);
    Number createNumber(in DOMString value);
    FunctionCall createFunctionCall(in DOMString name, in ExprList args);
  };

  interface Parser{
    Expr parseLocationPath(in DOMString path); // returns absolute or relative path, or step
  };
  
  interface AbsoluteLocationPath:Expr{
    /* '/' relative-opt, or '//' relative */
    readonly attribute Expr relative; // step or relative path
  };

  interface RelativeLocationPath:Expr{
    readonly attribute Expr left; // step or relative path
    readonly attribute Step right;
  };

  interface Step:Expr{
    readonly attribute AxisSpecifier axis;
    readonly attribute NodeTest test;
    readonly attribute PredicateList predicates;
  };

  const unsigned short ANCESTOR = 1;
  const unsigned short ANCESTOR_OR_SELF = 2;
  const unsigned short _ATTRIBUTE = 3; // attribute is a keyword
  const unsigned short CHILD = 4;
  const unsigned short DESCENDANT = 5;
  const unsigned short DESCENDANT_OR_SELF = 6;
  const unsigned short FOLLOWING = 7;
  const unsigned short FOLLOWING_SIBLING = 8;
  const unsigned short NAMESPACE = 9;
  const unsigned short PARENT = 10;
  const unsigned short PRECEDING = 11;
  const unsigned short PRECEDING_SIBLING = 12;
  const unsigned short SELF = 13;
  interface AxisSpecifier:Expr{
    readonly attribute unsigned short name;
  };

  const unsigned short COMMENT = 1;
  const unsigned short TEXT = 2;
  const unsigned short PROCESSING_INSTRUCTION = 3;
  const unsigned short NODE = 4;
  interface NodeTest:Expr{
    readonly attribute unsigned short test;
    readonly attribute DOMString literal; // only for PROCESSING_INSTRUCTION
  };

  interface NameTest:Expr{
    readonly attribute DOMString prefix; // may be null
    readonly attribute DOMString localName; // may be "*"
  };

  const unsigned short BINOP_OR = 1;
  const unsigned short BINOP_AND = 2;
  const unsigned short BINOP_EQ = 3;
  const unsigned short BINOP_NEQ = 4;
  const unsigned short BINOP_LT = 5;
  const unsigned short BINOP_GT = 6;
  const unsigned short BINOP_LE = 7;
  const unsigned short BINOP_GE = 8;
  const unsigned short BINOP_PLUS = 9;
  const unsigned short BINOP_MINUS = 10;
  const unsigned short BINOP_TIMES = 11;
  const unsigned short BINOP_DIV = 12;
  const unsigned short BINOP_MOD = 13;
  const unsigned short BINOP_UNION = 14;
  interface BinaryExpr:Expr{
    readonly attribute unsigned short operator;
    readonly attribute Expr left,right;
  };

  // can be only the unary minus
  interface UnaryExpr:Expr{
    readonly attribute Expr exp;
  };

  interface PathExpr:Expr{
    readonly attribute Expr filter;
    readonly attribute Expr path;
  };

  interface FilterExpr:Expr{
    readonly attribute Expr filter;
    readonly attribute Expr predicate;
  };

  interface VariableReference:Expr{
    readonly attribute DOMString name;
  };

  interface Literal:Expr{
    readonly attribute DOMString value;
  };

  interface Number:Expr{
    readonly attribute double value;
  };

  interface FunctionCall:Expr{
    readonly attribute DOMString name;
    readonly attribute ExprList args;
  };

};