[getopt-sig] Yet another parser proposal

s.keim s.keim
Mon, 18 Feb 2002 10:25:12 +0100


#  quickcmd.py
#
# a quick and dirty implementation around Shane Hathaway idea:
# let's use python syntax for cmd line grammar specification.
#
# the cmd line must match a function prototype:
#      *options are converted into args with default value
#      *positional parameters are converted into args without default 
value
#      *use default value type of keyword arguments for type checking
#      *you can also have options without default value, if you provide
#       a callable object as keyword argument default value
#      *a None default value specify a flag (an option without value)
#      *the *kw protocol specify unlimited number of positional 
parameters
#      *you can handle multi value for option by using tupple as default 
value
#      *if you don't care about the number of values you can use list
#       note that the user will have to provide at least one value 
(maybe something
#       to improve) and that if the number of values provided is greater 
than the list
#       length, additional values will be left as strings.
#
# Well it wasn't easy to switch between function arguments and command 
line arguments
# in this explanation , but it's quite easy to use! isn't it?
#
from __future__ import generators
import sys, inspect, types
		
class unix_parser:
     """parsing is split from grammar checking, this allow to use
     alternative parsers for other syntax (eg DOS syntax)"""
     def __init__(self, usage, argv):
         self.argv0 = argv[0]
         self.argv = argv[1:]
         self.usage = usage
         self.synopsis = None

     def __iter__(self):
         while self.argv:         #positionnals
             token = self.argv.pop(0)
             if token[0]=='-':
                 break
             yield None, token
             token = None
         while token:        #options
             if token[:2] == '--':
                 s = token[2:].split('=')
             else:
                 s = [token[1]]
                 if token[2:]: s+= [token[2:]]
             key = s[0]
             value = s[1:]
             while self.argv:
                 token = self.argv.pop(0)
                 if token[0]=='-':
                     yield (key,value)
                     break
                 if token[0] == '=':
                     token=token[1:]
                 if token != '' :
                     value.append(token)
             else:
                 token = None
             yield (key, value)

     def error(self, msg=None):
         if msg:
             sys.stderr.write('argument error: '+msg+'\n')
         sys.stderr.write('\n'+"SYNOPSIS : "+self.argv0+' 
'+str(self.synopsis)+'\n')
         sys.stderr.write(self.usage)
         sys.stderr.flush()
         sys.exit(1)

     def set_synopsis(self, arg_name, varargs,  options, flags):
         def sign(k):
             return '-'*(1+(len(k)>1)) + k
         def _option(arg):
             key,value = arg
             s = '['+sign(key)
             if type(value) in (type(()), type([])):
                 s+= ' '
             else:
                 s+=  len(key)>1 and '=' or ' '
                 value = (value,)
             s+= ' '.join([callable(i) and '<'+i.__name__+'>' or str(i)  
for i in value])
             if type(value) == type([]): s+='...'
             return s+']'
         def _flag(key):
             return '['+sign(key)+']'
         h = ' '.join(arg_name)+' '
         if varargs:
             h+= '['+varargs+'...]'
         if 'help' not in options:
             h+='[--help]'
         h+= ''.join(map(_option,options.items()) + map(_flag,flags))
         self.synopsis = h + '\n'


class OptionError(Exception):
      def check(cls, condition, *args):
          if not condition:
              raise cls(*args)
      check = classmethod(check)

# for the type checking
class OptionTypeError(OptionError):
     def __init__(self, name, value, _type):
         self.name = name
         self.value = value
         self.type = _type
     def __str__(self):
         return "bad value for "+self.name+", "+self.value+ \
                " should be ("+self.type.__name__+")"

def _isdefault(v):
     if callable(v):
         return None
     if type(v) in (type(()),type([])):
         for i in v:
              if callable(i):
                 return None
     return 1

def _checktype(name, values, def_vals):
     unpack = None
     remain = []
     if type(def_vals) ==  type(()):
         OptionError.check(len(values)==len(def_vals), "bad number of 
values for "+name)
     elif type(def_vals) ==  type([]):
         l = min(len(values),len(def_vals))
         values,remain  = values[:l], values[l:]
         def_vals = def_vals[:l]
     else:
         def_vals = (def_vals,)
         unpack = 1
     data = []
     for v,d in zip(values, def_vals):
         if (callable(d)):
             creator = d
         else:
             creator = type(d)
         if creator == types.InstanceType:
             creator = def_val.__class__
         try:
             data.append(creator(v))
         except ValueError:
             raise OptionTypeError(name, v, creator)
     if unpack:
         return data[0]
     else:
         return data+remain

def _grammar(func):
      """return a grammar from function signature:
         return a tupple:
         ([pos args], varargs, {options=value}, {flags=None})
      """
      args, varargs, varkw, defaults = inspect.getargspec(func)
      if varkw : raise TypeError, "grammar function can't use **"+varkw
      l = len(args)-len(defaults)
      args, opts = args[:l], zip(args[l:],defaults)
      flags = {}; options = {}
      for key,value in opts:
          if value:
              options[key] = value
          else:
              flags[key] = None
      return args, varargs, options, flags

class quickcmd:
     def __init__(self):
         self.keywords=[]
         self.options={}

     def feed(self, func, line=sys.argv, Parser = unix_parser):
          """ cmd line analysys with func gramar """
          arg_names, varargs, def_options, def_flags = _grammar(func)
          #would it be beter to use module docstring instead?
          self.parser = parser = Parser(str(inspect.getdoc(func)), line)
          parser.set_synopsis(arg_names,varargs,def_options,def_flags)
          self.arg_names = arg_names, varargs

          options={}; positionnals=[]
          for key, value in parser:
              if key is None:
                  positionnals.append(value)
              else:
                  if value == []:
                      if key in def_flags:
                          options[key] = 1
                      elif key=='help':
                          parser.error()
                      elif key in def_options:
                          parser.error ("value required for option : 
"+key)
                      else:
                          parser.error ("unknown option : "+key)
                  else:
                      if key in def_options:
                          try:
                              options[key] = _checktype(key, value, 
def_options[key])
                          except OptionError, e:
                              parser.error(str(e))
                      elif key in def_flags:
                          parser.error ("option "+key+" can't have value")
                      else:
                          parser.error ("unknown option : "+key)


          #allow change to self.options by type checking functions
          #with this you could for sample let the user specify a file 
containing defaults
          def_options.update(self.options)
          def_options.update(options)
          self.options = def_options
          self.keywords.extend(positionnals)
          if len(self.keywords)!=len(arg_names) and not varargs:
              parser.error("bad number of positionnals arguments "
                           "(should be exactely "+str(len(arg_names))+")")
          elif len(self.keywords)<len(arg_names):
              parser.error("not enough positionnals arguments "
                           "(should be at least "+str(len(arg_names))+")")
          try:
              # going around a strange feature of python:
              # the following seems to not work when you have unlimited 
number of positionnals:
              # func(*positionnals, **options)
              l = len(arg_names)
              def_flags.update(def_options)
              names = inspect.getargspec(func)[0][l:]
              p = positionnals[:l] + [def_flags[n] for n in names] + 
positionnals[l:]
              func(*p)

          except OptionError,e:
              parser.error(str(e))
          for key,value in self.options.items():
              if not _isdefault(value):
                  del self.options[key]


     def checkargs(self,*_types):
         """this function must be used to check the positionnals 
arguments"""
         keywords = self.keywords
         size = len(_types)
         arg_names = self.arg_names[0]
         arg_names += [self.arg_names[1]] * (size-len(arg_names))
         result = []
         for name, value, _type in zip(arg_names, keywords, _types):
             try:
                 result.append(_checktype(name, [value], _type))
             except OptionTypeError, e:
                 self.parser.error(str(e))
         self.keywords = result + keywords[size:]

         def error(self, msg):
                 self.parser.error(msg)



#---------------------------------------------------------
# the following should go on your own module


# this is the function used to define cmd line grammar
def tester (spam, egs, xy=(1,1), ran='yes', splof=None, dest=[str], 
v=int, *files):
      """a description of your script

      spam    a positional argument (string)
      egs     another positional argument (float)
      files   specify unlimited positional arguments
      xy      an option with two integer values
      ran     an option with a string value
      splof   an option without value (a flag)
      v       a short option of type int but without default value
      dest    an option with unlimited number of strings values
      """

      #this is a good place for semantical checking:
      OptionError.check(ran in ['yes','no'],
                        " ran must be set to 'yes' or 'no'")
      OptionError.check((not splof) or v!=int ,
                        "v must be set if splof activated") #meaningful 
message ;)

# yes it's all
# hmm...wait, you still have to call the parser ;-)

cmd = quickcmd()
cmd.feed(tester, ['tester', 'f1', '89.5', '--xy', '45', '17'])
                #  , '--ran=no','--dest', '/tmp/chx', 
'/usr/slog','--splof','-v10'])

#I was unable to automate type checking-conversion for positional args
cmd.checkargs(str,float)
#maybe would it be better to have this values returned by the grammar 
function?

# it's really all :) , now you can use cmd.options and cmd.keywords
print '-'*50
print cmd.keywords
print cmd.options