This page contains parsers contributed by pyparsing users who have tackled an interesting parsing problem.



Table of Contents


SVG 1.1 Parser (partial)


This parser will chop-up SVG attributes into useful structures. You can then use those to build commands to draw stuff -- for e.g. by using pycairo.

The code is not complete, but works well right now. I will re-visit this page as I get more done. Please hack this code and improve it!

\d

#!/usr/bin/env python
##    SVG_parser Copyright (C) 2010 Donn.C.Ingle
##    Contact: donn.ingle@gmail.com - I hope this email lasts.
##
##    This file is part of SVG_parser.
##    SVG_parser is free software: you can redistribute it and/or modify
##    it under the terms of the GNU General Public License as published by
##    the Free Software Foundation, either version 3 of the License, or
##    (at your option) any later version.
##
##    SVG_parser is distributed in the hope that it will be useful,
##    but WITHOUT ANY WARRANTY; without even the implied warranty of
##    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
##    GNU General Public License for more details.
##
##    You should have received a copy of the GNU General Public License
##    along with SVG_parser.  If not, see <http://www.gnu.org/licenses/>.
 
"""
Oh brevity, thy namespace is pyparsing.
"""
 
"""
About:
 
Reference:
Scalable Vector Graphics (SVG) 1.1 Specification
http://www.w3.org/TR/SVG/index.html
"""
 
import pyparsing as PP 
 
## Quick handy function - return key value or None
keyval_or_none = lambda key, dct: dct[key] if key in dct else None
 
 
## Some basics
 
## Quoth Paul:
# "just get all the punctuation out of the way"
dot,comma,open_bracket,close_bracket,semi,colon = map(PP.Suppress,".,();:")
 
## Quoth Paul:
## "I'm finding that complex items like real numbers just work better
## using a Regex than Combine'ing Words, Optionals, etc."
floater = PP.Regex(r"-?\d+(\.\d*)?([Ee][+-]?\d+)?")
floater.setParseAction(lambda toks:float(toks[0]))
 
 
 
## --------------------------------------------------------
##                  The <path> 'd' attribute
## --------------------------------------------------------
## http://www.w3.org/TR/SVG/paths.html
 
## A couple is an X,Y coord
couple = PP.Group( floater + comma + floater ) #Grouped so they'll be in a list together.
## A triple of couples (like you get in C (bezier) commands)
triplecouple = PP.Group(couple+couple+couple)
 
 
class Command(PP.CaselessLiteral):
  """
  I need the case of d commands to be preserved.
  Uppercase means absolute coords, lower means relative.
  """
  def parseImpl( self, instring, loc, doActions=True ):
    test = instring[ loc:loc+self.matchLen ]
    if test.upper() == self.match:
      return loc+self.matchLen, test #Here we return test, as-is.
    exc = self.myException
    exc.loc = loc
    exc.pstr = instring
    raise exc 
 
## We use the Command class to ensure M or m gets through.
## BTW: All this PP.Group() stuff makes nice lists of what you group -- I rely on this later.
M_command = Command("M") + PP.OneOrMore(couple) # M commands with > 1 couple are line_to commands...
C_command = Command("C") + PP.OneOrMore(triplecouple) # We can have many triples after a C
L_command = Command("L") + PP.OneOrMore(PP.Group(couple)) # We can have many couples after an L
Z_command = "Z"
 
d_commands = M_command | C_command | L_command | Z_command
phrase_d = PP.OneOrMore(PP.Group(d_commands)) 
 
 
## --------------------------------------------------------
##               The 'transform' attribute
## --------------------------------------------------------
## http://www.w3.org/TR/SVG/coords.html#TransformAttribute
 
## Whoo-boy! Do I love this one:
floatargs = PP.Group(PP.delimitedList(floater))
 
## A shorthand bunch of data and rules for transform stuff:
attrib_rules={
    'matrix'   : {'ok':lambda l:l==6},
    'translate': {'ok':lambda l:(l==1 or l==2), 'missing_arg': lambda a: [a[0],0]},
    'rotate'   : {'ok':lambda l:(l==1 or l==3) },
    'scale'    : {'ok':lambda l:l==1 or l==2, 'missing_arg': lambda a: [a[0],a[0]]},
    'skewX'    : {'ok':lambda l:l==1},
    'skewY'    : {'ok':lambda l:l==1}
}
 
## Validate all floatargs:
def validate_args(s,l,tokens):
  ns = s[:l-1].split()[-1] #get last 'word' : harsh on ye olde brain...
  rules = attrib_rules[ ns ]
  toks = tokens[0] # it comes wrapped in another list [ [...] ]
  ## First verify the number of args
  if not rules['ok'](len(toks)):
    print "  ** {0}{1} is malformed. This is me ignoring it.".format(ns,toks)
    ## This excep causes PP to skip over the malformed whatsits.
    raise PP.ParseException(s,l,"wrong number of args")
  ## Now, check for optional args (missing) and run their rule
  if 'missing_arg' in rules:
    return [ rules['missing_arg'](toks) ] 
 
oac = open_bracket + floatargs + close_bracket
 
## Make some pyparsing-voodoo in a list
mcs=[ key + oac for key in attrib_rules.keys()]
 
## Sweet :) Use that list and | the items together.
matrix_commands = reduce( lambda a,b : a|b, mcs)
 
## Setup func to validate the args
floatargs.setParseAction( validate_args )
 
## We have a winner!
phrase_transform = PP.OneOrMore(PP.Group(matrix_commands))
 
 
## --------------------------------------------------------
##                   Style grammar
## --------------------------------------------------------
## Still a work in progress.
 
px = PP.Suppress("px")
hexNums = PP.Suppress("#") + PP.Word(PP.hexnums) | PP.Word("none")
 
FILL_command = "fill" + colon + hexNums + PP.Optional(semi)
FILLOPACITY_command = "fill-opacity" + colon + floater + PP.Optional(semi)
STROKECOLOR_command = "stroke" + colon + hexNums + PP.Optional(semi)
STROKEWIDTH_command = "stroke-width" + colon + floater + PP.Optional(px) + PP.Optional(semi)
STROKEOPACITY_command = "stroke-opacity" + colon + floater + PP.Optional(semi)
 
phrase_style = FILL_command | FILLOPACITY_command | STROKECOLOR_command | STROKEWIDTH_command | STROKEOPACITY_command
 
 
def parse_transform( t ):
  """
  Parse the "transform" attribute of an element.
  Pass in a string (the attribute's value.)
  Returns: A list of dicts like {'cmd':blah,'args':bleh}
  """
  if t is None: return None
  l=[]
 
  ## searchString seems to return several lists
  ## so I have to re-loop those in turn.
  for x in phrase_transform.searchString( t ):
    for tt in x:
      tt = tt.asList()
      dict={'cmd':tt[0], 'args':tt[1]}
      l.append( dict )
  if not l: return None
  return l
 
 
 
## I can't get PP to allocate keys to my args, so this will help me in parse_d
l2d = {
    'M':lambda l:{'x':l[0],'y':l[1]},
    'L':lambda l:{'x':l[0],'y':l[1]},
    'C':lambda l:{'x1':l[0][0],'y1':l[0][1],'x2':l[1][0],'y2':l[1][1],'x3':l[2][0],'y3':l[2][1]},
    }
 
def parse_d( d ):
  """
  Parse the "d" attribute of an element.
  Pass in a string (the attribute's value.)
  Return a list dicts in the form:
  'cmd':'..' 
  'coords': { 'x':..., 'y':... etc. } this is optional. e.g. not there for Z
  """
  if not d: return None
 
  tokens = phrase_d.parseString( d )
 
  ## Build a list of commands and coords
  command_list = []           
  for group in tokens:
    group = group.asList() # Turn the bugger into a list!
    cmd=group.pop(0)
    cmdup=cmd.upper()
 
    ## M/m : First couple is move_to, the rest (if any) are L/l (line_to)
    if cmdup == "M":
      command_list.append( {'cmd':cmd, 'coords': l2d['M'](group[0])} )
      L = "L" if cmd=="M" else "l"
      if len(group)>1:
        for cpl in group[1:]:
          command_list.append( {'cmd': L, 'coords': l2d['L'](cpl) })
    ## C/c : One or more bezier C triple-couples
    elif cmdup == "C":
      for triplecpl in group:
        command_list.append( {'cmd':cmd, 'coords': l2d['C'](triplecpl)} )
    ## L/l : One or more Lines
    elif cmdup == "L":
      for cpl in group:
        command_list.append( {'cmd':cmd, 'coords': l2d['L'](cpl[0])} ) 
    else: # no coords
    ## Assume this is Z
      command_list.append( {"cmd":cmdup} )
 
  return command_list
 
 
 
d2l = {
    "M":lambda c: "{0},{1}".format(c['x'], c['y']),
    "L":lambda c: "{0},{1}".format(c['x'], c['y']),
    "C":lambda c: "{0},{1} {2},{3} {4},{5}".format(c['x1'],c['y1'],c['x2'],c['y2'],c['x3'],c['y3']),
  }
def unparse_to_d( lst ):
  """
  Given a list of command-dicts (as returned by parse_d(), return 
  a 'd' string fit for an svg path tag attribute.
  """
  d=""
  for dict in lst:
    d += dict['cmd'] + " "
    coords =keyval_or_none('coords',dict)
    if coords:
      d+= d2l[dict['cmd'].upper()](coords)
    d+=" "
  return d
 
 
def parse_style( style ):
  """
  Given a style attribute string, parse into a dict.
  This is still under progress.
  """
  tokens = phrase_style.searchString( style )
  d = dict(tokens.asList())
  ## use like this:
  ## blah = d["KEY"]
  ## e.g:
  ## rbg = d['fill']
 
 
if __name__ == "__main__":
  """
  Tests
  """
 
  ## A d string. Newlines for clarity.
  d="""m 88.893427,8.5760927 2,2
  c 
  0,41.5629413 
  -34.597945,75.2563633 
  -77.276672,75.2563633 
 
  -42.678728,0 
  -77.276673,-33.693422 
  -77.276673,-75.2563633 
 
  0,-41.5629417 
  34.597945,-75.2563627 
  77.276673,-75.2563627 
 
  42.678727,0 
  77.276672,33.693421 
  77.276672,75.2563627   
  L
  10,10
  20,20
  L
  7,8
  C
  1,2
  3,4
  5,6
  Z"""
 
  import pprint
 
  print "PATH TEST:"
  pprint.pprint( parse_d(d) )
 
  ## Test a transform string:
  s="""matrix(1,2,3,4,5,6) matrix(22222222222222) 
  translate(20,60) translate(55) 
  scale(3,4) scale(22) 
  rotate(5,5,6) rotate(99) rotate(1,2) 
  skewX(10) skewY(20)"""
 
  print "TRANSFORM TEST:"
  pprint.pprint( parse_transform(s) )

Running this will provide
PATH TEST:
[{'cmd': 'm', 'coords': {'x': 88.893427000000003, 'y': 8.5760927000000002}},
 {'cmd': 'l', 'coords': {'x': 2.0, 'y': 2.0}},
 {'cmd': 'c',
  'coords': {'x1': 0.0,
             'x2': -34.597945000000003,
             'x3': -77.276672000000005,
             'y1': 41.562941299999999,
             'y2': 75.256363300000004,
             'y3': 75.256363300000004}},
 {'cmd': 'c',
  'coords': {'x1': -42.678728,
             'x2': -77.276673000000002,
             'x3': -77.276673000000002,
             'y1': 0.0,
             'y2': -33.693421999999998,
             'y3': -75.256363300000004}},
 {'cmd': 'c',
  'coords': {'x1': 0.0,
             'x2': 34.597945000000003,
             'x3': 77.276673000000002,
             'y1': -41.562941700000003,
             'y2': -75.256362699999997,
             'y3': -75.256362699999997}},
 {'cmd': 'c',
  'coords': {'x1': 42.678727000000002,
             'x2': 77.276672000000005,
             'x3': 77.276672000000005,
             'y1': 0.0,
             'y2': 33.693421000000001,
             'y3': 75.256362699999997}},
 {'cmd': 'L', 'coords': {'x': 10.0, 'y': 10.0}},
 {'cmd': 'L', 'coords': {'x': 20.0, 'y': 20.0}},
 {'cmd': 'L', 'coords': {'x': 7.0, 'y': 8.0}},
 {'cmd': 'C',
  'coords': {'x1': 1.0,
             'x2': 3.0,
             'x3': 5.0,
             'y1': 2.0,
             'y2': 4.0,
             'y3': 6.0}},
 {'cmd': 'Z'}]
TRANSFORM TEST:
  ** matrix[22222222222222.0] is malformed. This is me ignoring it.
  ** matrix[22222222222222.0] is malformed. This is me ignoring it.
  ** rotate[1.0, 2.0] is malformed. This is me ignoring it.
  ** rotate[1.0, 2.0] is malformed. This is me ignoring it.
[{'args': [1.0, 2.0, 3.0, 4.0, 5.0, 6.0], 'cmd': 'matrix'},
 {'args': [20.0, 0], 'cmd': 'translate'},
 {'args': [55.0, 0], 'cmd': 'translate'},
 {'args': [3.0, 3.0], 'cmd': 'scale'},
 {'args': [22.0, 22.0], 'cmd': 'scale'},
 {'args': [5.0, 5.0, 6.0], 'cmd': 'rotate'},
 {'args': [99.0], 'cmd': 'rotate'},
 {'args': [10.0], 'cmd': 'skewX'},
 {'args': [20.0], 'cmd': 'skewY'}]