This page will contain at various times, parsers and grammars that are in transitional development stages, prior to getting posted to the Examples page.


Time expression parser


This parser will read natural descriptions of time, such as "10 minutes from now" or "3 days ago" and return a computed datetime object.

from datetime import datetime, timedelta
from pyparsing import *
import calendar
 
# string conversion parse actions
def convertToTimedelta(toks):
    unit = toks.timeunit.lower().rstrip("s")
    td = {
        'week'    : timedelta(7),
        'day'    : timedelta(1),
        'hour'   : timedelta(0,0,0,0,0,1),
        'minute' : timedelta(0,0,0,0,1),
        'second' : timedelta(0,1),
        }[unit]
    if toks.qty:
        td *= int(toks.qty)
    if toks.dir:
        td *= toks.dir
    toks["timeOffset"] = td
 
def convertToDay(toks):
    now = datetime.now()
    if "wkdayRef" in toks:
        todaynum = now.weekday()
        daynames = [n.lower() for n in calendar.day_name]
        nameddaynum = daynames.index(toks.wkdayRef.day.lower())
        if toks.wkdayRef.dir > 0:
            daydiff = (nameddaynum + 7 - todaynum) % 7
        else:
            daydiff = -((todaynum + 7 - nameddaynum) % 7)
        toks["absTime"] = datetime(now.year, now.month, now.day)+timedelta(daydiff)
    else:
        name = toks.name.lower()
        toks["absTime"] = {
            "now"       : now,
            "today"     : datetime(now.year, now.month, now.day),
            "yesterday" : datetime(now.year, now.month, now.day)+timedelta(-1),
            "tomorrow"  : datetime(now.year, now.month, now.day)+timedelta(+1),
            }[name]
 
def convertToAbsTime(toks):
    now = datetime.now()
    if "dayRef" in toks:
        day = toks.dayRef.absTime
        day = datetime(day.year, day.month, day.day)
    else:
        day = datetime(now.year, now.month, now.day)
    if "timeOfDay" in toks:
        if isinstance(toks.timeOfDay,basestring):
            timeOfDay = {
                "now"      : timedelta(0, (now.hour*60+now.minute)*60+now.second, now.microsecond),
                "noon"     : timedelta(0,0,0,0,0,12),
                "midnight" : timedelta(),
                }[toks.timeOfDay]
        else:
            hhmmss = toks.timeparts
            if hhmmss.miltime:
                hh,mm = hhmmss.miltime
                ss = 0
            else:            
                hh,mm,ss = (hhmmss.HH % 12), hhmmss.MM, hhmmss.SS
                if not mm: mm = 0
                if not ss: ss = 0
                if toks.timeOfDay.ampm == 'pm':
                    hh += 12
            timeOfDay = timedelta(0, (hh*60+mm)*60+ss, 0)
    else:
        timeOfDay = timedelta(0, (now.hour*60+now.minute)*60+now.second, now.microsecond)
    toks["absTime"] = day + timeOfDay
 
def calculateTime(toks):
    if toks.absTime:
        absTime = toks.absTime
    else:
        absTime = datetime.now()
    if toks.timeOffset:
        absTime += toks.timeOffset
    toks["calculatedTime"] = absTime
 
# grammar definitions
CL = CaselessLiteral
today, tomorrow, yesterday, noon, midnight, now = map( CL,
    "today tomorrow yesterday noon midnight now".split())
plural = lambda s : Combine(CL(s) + Optional(CL("s")))
week, day, hour, minute, second = map( plural,
    "week day hour minute second".split())
am = CL("am")
pm = CL("pm")
COLON = Suppress(':')
 
# are these actually operators?
in_ = CL("in").setParseAction(replaceWith(1))
from_ = CL("from").setParseAction(replaceWith(1))
before = CL("before").setParseAction(replaceWith(-1))
after = CL("after").setParseAction(replaceWith(1))
ago = CL("ago").setParseAction(replaceWith(-1))
next_ = CL("next").setParseAction(replaceWith(1))
last_ = CL("last").setParseAction(replaceWith(-1))
 
couple = (Optional(CL("a")) + CL("couple") + Optional(CL("of"))).setParseAction(replaceWith(2))
a_qty = CL("a").setParseAction(replaceWith(1))
integer = Word(nums).setParseAction(lambda t:int(t[0]))
int4 = Group(Word(nums,exact=4).setParseAction(lambda t: [int(t[0][:2]),int(t[0][2:])] ))
qty = integer | couple | a_qty
dayName = oneOf( list(calendar.day_name) )
 
dayOffset = (qty("qty") + (week | day)("timeunit"))
dayFwdBack = (from_ + now.suppress() | ago)("dir")
weekdayRef = (Optional(next_ | last_,1)("dir") + dayName("day"))
dayRef = Optional( (dayOffset + (before | after | from_)("dir") ).setParseAction(convertToTimedelta) ) + \
            ((yesterday | today | tomorrow)("name")|
             weekdayRef("wkdayRef")).setParseAction(convertToDay)
todayRef = (dayOffset + dayFwdBack).setParseAction(convertToTimedelta) | \
            (in_("dir") + qty("qty") + day("timeunit")).setParseAction(convertToTimedelta)
 
dayTimeSpec = dayRef | todayRef
dayTimeSpec.setParseAction(calculateTime)
 
hourMinuteOrSecond = (hour | minute | second)
 
timespec = Group(int4("miltime") |
                 integer("HH") + 
                 Optional(COLON + integer("MM")) + 
                 Optional(COLON + integer("SS")) + (am | pm)("ampm")
                 )
absTimeSpec = ((noon | midnight | now | timespec("timeparts"))("timeOfDay") + 
                Optional(dayRef)("dayRef"))
absTimeSpec.setParseAction(convertToAbsTime,calculateTime)
 
relTimeSpec = qty("qty") + hourMinuteOrSecond("timeunit") + \
                (from_ | before | after)("dir") + \
                absTimeSpec("absTime") | \
              qty("qty") + hourMinuteOrSecond("timeunit") + ago("dir") | \
              in_ + qty("qty") + hourMinuteOrSecond("timeunit")
relTimeSpec.setParseAction(convertToTimedelta,calculateTime)
 
nlTimeExpression = (absTimeSpec | dayTimeSpec | relTimeSpec)
 
# test grammar
tests = """\
today
tomorrow
yesterday
in a couple of days
a couple of days from now
a couple of days from today
in a day
3 days ago
3 days from now
a day ago
now
10 minutes ago
10 minutes from now
in 10 minutes
in a minute
in a couple of minutes
20 seconds ago
in 30 seconds
20 seconds before noon
20 seconds before noon tomorrow
noon
midnight
noon tomorrow
6am tomorrow
0800 yesterday
12:15 AM today
3pm 2 days from today
a week from today
a week from now
3 weeks ago
noon next Sunday
noon Sunday
noon last Sunday""".splitlines()
 
for t in tests:
    print t, "(relative to %s)" % datetime.now()
    res = nlTimeExpression.parseString(t)
    if "calculatedTime" in res:
        print res.calculatedTime
    else:
        print "???"
    print
 

This prints:
today (relative to 2009-09-30 22:30:51.747000)
2009-09-30 00:00:00
 
tomorrow (relative to 2009-09-30 22:30:51.763000)
2009-10-01 00:00:00
 
yesterday (relative to 2009-09-30 22:30:51.763000)
2009-09-29 00:00:00
 
in a couple of days (relative to 2009-09-30 22:30:51.763000)
2009-10-02 22:30:51.763000
 
a couple of days from now (relative to 2009-09-30 22:30:51.763000)
2009-10-02 22:30:51.763000
 
a couple of days from today (relative to 2009-09-30 22:30:51.763000)
2009-10-02 00:00:00
 
in a day (relative to 2009-09-30 22:30:51.763000)
2009-10-01 22:30:51.763000
 
3 days ago (relative to 2009-09-30 22:30:51.763000)
2009-09-27 22:30:51.763000
 
3 days from now (relative to 2009-09-30 22:30:51.763000)
2009-10-03 22:30:51.763000
 
a day ago (relative to 2009-09-30 22:30:51.763000)
2009-09-29 22:30:51.763000
 
now (relative to 2009-09-30 22:30:51.763000)
2009-09-30 22:30:51.763000
 
10 minutes ago (relative to 2009-09-30 22:30:51.763000)
2009-09-30 22:20:51.778000
 
10 minutes from now (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:40:51.778000
 
in 10 minutes (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:40:51.778000
 
in a minute (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:31:51.778000
 
in a couple of minutes (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:32:51.778000
 
20 seconds ago (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:30:31.778000
 
in 30 seconds (relative to 2009-09-30 22:30:51.778000)
2009-09-30 22:31:21.778000
 
20 seconds before noon (relative to 2009-09-30 22:30:51.778000)
2009-09-30 11:59:40
 
20 seconds before noon tomorrow (relative to 2009-09-30 22:30:51.778000)
2009-10-01 11:59:40
 
noon (relative to 2009-09-30 22:30:51.794000)
2009-09-30 12:00:00
 
midnight (relative to 2009-09-30 22:30:51.794000)
2009-09-30 00:00:00
 
noon tomorrow (relative to 2009-09-30 22:30:51.794000)
2009-10-01 12:00:00
 
6am tomorrow (relative to 2009-09-30 22:30:51.794000)
2009-10-01 06:00:00
 
0800 yesterday (relative to 2009-09-30 22:30:51.794000)
2009-09-29 08:00:00
 
12:15 AM today (relative to 2009-09-30 22:30:51.794000)
2009-09-30 00:15:00
 
3pm 2 days from today (relative to 2009-09-30 22:30:51.794000)
2009-10-02 15:00:00
 
a week from today (relative to 2009-09-30 22:30:51.794000)
2009-10-07 00:00:00
 
a week from now (relative to 2009-09-30 22:30:51.794000)
2009-10-07 22:30:51.794000
 
3 weeks ago (relative to 2009-09-30 22:30:51.794000)
2009-09-09 22:30:51.794000
 
noon next Sunday (relative to 2009-09-30 22:30:51.794000)
2009-10-04 12:00:00
 
noon Sunday (relative to 2009-09-30 22:30:51.794000)
2009-10-04 12:00:00
 
noon last Sunday (relative to 2009-09-30 22:30:51.794000)
2009-09-27 12:00:00


C struct parser


C structures are hellish to parse, given the possibilities for nesting, pointers, user-defined types, and bitfields. Here is a parser I wrote a while ago, if someone would like to pick it up and enhance it, please do so!

from pyparsing import Optional, Word, Literal, Forward, alphas, nums, \
    Group, ZeroOrMore, oneOf, delimitedList, cStyleComment, restOfLine
import pprint
 
cstructBNF = None
def getCStructBNF():
    global cstructBNF
    if cstructBNF is None:
        structDecl = Forward()
        ident = Word( alphas+"_", alphas+nums+"_$" )
        integer = Word( nums )
        semi = Literal(";").suppress()
        lbrace = Literal("{").suppress()
        rbrace = Literal("}").suppress()
 
        typeName = ident
        varName = ident
        # <- should really support an expression here, but keep simple for now
        arraySizeSpecifier = integer | ident  
        typeSpec = Optional("unsigned") + oneOf("int long short double char void") + \
                       Optional(Word("*"), default="") 
        bitfieldspec = ":" + arraySizeSpecifier
        varnamespec = ( varName + Optional( bitfieldspec | ( "[" + arraySizeSpecifier + "]" )))
        memberDecl = Group( ( typeSpec | typeName ) + Optional(Word("*"), default="") + 
                            Group( delimitedList( varnamespec ) )  
                            + semi ) | structDecl
 
        structDecl << Group( oneOf("struct union") + Optional(ident) + \
                        Group( lbrace + ZeroOrMore( memberDecl ) + rbrace) + \
                        Optional(Word("*"), default="") + Optional(varnamespec) + semi )
 
        cstructBNF = structDecl
 
        cplusplusLineComment = Literal("//") + restOfLine
 
        cstructBNF.ignore( cStyleComment )  # never know where these will crop up!
        cstructBNF.ignore( cplusplusLineComment )  # or these either
 
    return cstructBNF
 
 
testData1 = """
    struct {
        long a;
        short b;
        char c[32];
        } a;
"""
 
testData2 = """
    union {
        long a;
        long* a;
        long** a;
        long*** a;
        long**** a;
        long *a;
        long **a;
        struct {
            int x;
            int y;
            } pt;  // this is an embedded struct
        struct {
            int x,y;
            struct {
               char* a;
               char* b;
               } inner;
            } pt2;
        struct {
            int x;
            int y;
            }* coordPtr; /* this is just a pointer to a struct */
        short b;
        char c[32];
        char d[MAX_LENGTH /* + 1 to make room for terminating null */ ];
        char* name;
        char *name2;  /* no one can agree where the '*' should go */
        int bitfield:5;  /* this is rare, but not hard to add to parse grammar */
        int bitfield2:BIT2LEN;
        void* otherData;
        } a;
"""
 
bnf = getCStructBNF()
for testdata in (testData1, testData2):
    pprint.pprint( bnf.parseString(testdata).asList() )
    print

Prints:
[['struct',
  [['long', '', '', ['a']],
   ['short', '', '', ['b']],
   ['char', '', '', ['c', '[', '32', ']']]],
  '',
  'a']]
 
[['union',
  [['long', '', '', ['a']],
   ['long', '*', '', ['a']],
   ['long', '**', '', ['a']],
   ['long', '***', '', ['a']],
   ['long', '****', '', ['a']],
   ['long', '*', '', ['a']],
   ['long', '**', '', ['a']],
   ['struct', [['int', '', '', ['x']], ['int', '', '', ['y']]], '', 'pt'],
   ['struct',
    [['int', '', '', ['x', 'y']],
     ['struct',
      [['char', '*', '', ['a']], ['char', '*', '', ['b']]],
      '',
      'inner']],
    '',
    'pt2'],
   ['struct',
    [['int', '', '', ['x']], ['int', '', '', ['y']]],
    '*',
    'coordPtr'],
   ['short', '', '', ['b']],
   ['char', '', '', ['c', '[', '32', ']']],
   ['char', '', '', ['d', '[', 'MAX_LENGTH', ']']],
   ['char', '*', '', ['name']],
   ['char', '*', '', ['name2']],
   ['int', '', '', ['bitfield', ':', '5']],
   ['int', '', '', ['bitfield2', ':', 'BIT2LEN']],
   ['void', '*', '', ['otherData']]],
  '',
  'a']]