embedded/mx/DateTime/ARPA.py
author sylvain.thenault@logilab.fr
Thu, 26 Mar 2009 18:59:56 +0100
branchtls-sprint
changeset 1148 55a8238f8f7c
parent 0 b97547f5f1fa
permissions -rw-r--r--
keep notion of relation tags, tough with simplier implementation and usage

""" This module provides a set of constructors and routines to convert
    between DateTime[Delta] instances and ARPA representations of date
    and time. The format is specified by RFC822 + RFC1123.

    Note: Timezones are only interpreted by ParseDateTimeGMT(). All
    other constructors silently ignore the time zone information.

    Copyright (c) 1998-2000, Marc-Andre Lemburg; mailto:mal@lemburg.com
    Copyright (c) 2000-2007, eGenix.com Software GmbH; mailto:info@egenix.com
    See the documentation for further information on copyrights,
    or contact the author. All Rights Reserved.

"""
import DateTime,Timezone
import re,string

# Grammar: RFC822 + RFC1123 + depreciated RFC850
_litday = '(?P<litday>Mon|Tue|Wed|Thu|Fri|Sat|Sun)[a-z]*'
_litmonth = '(?P<litmonth>Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'\
            '[a-z]*'
_date = ('(?:(?P<day>\d?\d)(?: +' + _litmonth + 
         ' +|-(?P<month>\d?\d)-)(?P<year>(?:\d\d)?\d\d))')
_zone = Timezone.zone
_time = ('(?:(?P<hour>\d\d):(?P<minute>\d\d)'
         '(?::(?P<second>\d\d))?(?: +'+_zone+')?)')
#       Timezone information is made optional because some mail apps
#       forget to add it (most of these seem to be spamming engines, btw).
#       It defaults to UTC.

_arpadate = '(?:'+ _litday + ',? )? *' + _date
_arpadatetime = '(?:'+ _litday + ',? )? *' + _date + ' +' + _time

#       We are not strict about the extra characters: some applications
#       add extra information to the date header field. Additional spaces
#       between the fields and extra characters in the literal day
#       and month fields are also silently ignored.

arpadateRE = re.compile(_arpadate)
arpadatetimeRE = re.compile(_arpadatetime)

# Translation tables
litdaytable = {'mon':0, 'tue':1, 'wed':2, 'thu':3, 'fri':4, 'sat':5, 'sun':6 }
litmonthtable = {'jan':1, 'feb':2, 'mar':3, 'apr':4, 'may':5, 'jun':6,
                 'jul':7, 'aug':8, 'sep':9, 'oct':10, 'nov':11, 'dec':12 }
_days = ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']
_months = [None, 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec' ]

def ParseDate(arpastring,parse_arpadate=arpadateRE.match,

              strip=string.strip,atoi=string.atoi,atof=string.atof,
              lower=string.lower):

    """ParseDate(arpastring)

       Returns a DateTime instance reflecting the given ARPA
       date. Only the date part is parsed, any time part will be
       ignored. The instance's time is set to 0:00:00.

    """
    s = strip(arpastring)
    date = parse_arpadate(s)
    if not date:
        raise ValueError,'wrong format'
    litday,day,litmonth,month,year = date.groups()
    if len(year) == 2:
        year = DateTime.add_century(atoi(year))
    else:
        year = atoi(year)
    if litmonth:
        litmonth = lower(litmonth)
        try:
            month = litmonthtable[litmonth]
        except KeyError:
            raise ValueError,'wrong month format'
    else:
        month = atoi(month)
    day = atoi(day)
    # litday and timezone are ignored
    return DateTime.DateTime(year,month,day)

def ParseDateTime(arpastring,parse_arpadatetime=arpadatetimeRE.match,

                  strip=string.strip,atoi=string.atoi,atof=string.atof,
                  lower=string.lower):

    """ParseDateTime(arpastring)

       Returns a DateTime instance reflecting the given ARPA date assuming
       it is local time (timezones are silently ignored).
    """
    s = strip(arpastring)
    date = parse_arpadatetime(s)
    if not date:
        raise ValueError,'wrong format or unknown time zone'
    litday,day,litmonth,month,year,hour,minute,second,zone = date.groups()
    if len(year) == 2:
        year = DateTime.add_century(atoi(year))
    else:
        year = atoi(year)
    if litmonth:
        litmonth = lower(litmonth)
        try:
            month = litmonthtable[litmonth]
        except KeyError:
            raise ValueError,'wrong month format'
    else:
        month = atoi(month)
    day = atoi(day)
    hour = atoi(hour)
    minute = atoi(minute)
    if second is None:
        second = 0.0
    else:
        second = atof(second)
    # litday and timezone are ignored
    return DateTime.DateTime(year,month,day,hour,minute,second)

def ParseDateTimeGMT(arpastring,parse_arpadatetime=arpadatetimeRE.match,

                     strip=string.strip,atoi=string.atoi,atof=string.atof,
                     lower=string.lower):

    """ParseDateTimeGMT(arpastring)

       Returns a DateTime instance reflecting the given ARPA date converting
       it to UTC (timezones are honored).
    """
    s = strip(arpastring)
    date = parse_arpadatetime(s)
    if not date:
        raise ValueError,'wrong format or unknown time zone'
    litday,day,litmonth,month,year,hour,minute,second,zone = date.groups()
    if len(year) == 2:
        year = DateTime.add_century(atoi(year))
    else:
        year = atoi(year)
    if litmonth:
        litmonth = lower(litmonth)
        try:
            month = litmonthtable[litmonth]
        except KeyError:
            raise ValueError,'wrong month format'
    else:
        month = atoi(month)
    day = atoi(day)
    hour = atoi(hour)
    minute = atoi(minute)
    if second is None:
        second = 0.0
    else:
        second = atof(second)
    offset = Timezone.utc_offset(zone)
    # litday is ignored
    return DateTime.DateTime(year,month,day,hour,minute,second) - offset

# Alias
ParseDateTimeUTC = ParseDateTimeGMT

def str(datetime,tz=None):

    """str(datetime,tz=DateTime.tz_offset(datetime))

    Returns the datetime instance as ARPA date string. tz can be given
    as DateTimeDelta instance providing the time zone difference from
    datetime's zone to UTC. It defaults to
    DateTime.tz_offset(datetime) which assumes local time. """

    if tz is None:
        tz = datetime.gmtoffset()
    return '%s, %02i %s %04i %02i:%02i:%02i %+03i%02i' % (
        _days[datetime.day_of_week], datetime.day, 
        _months[datetime.month], datetime.year,
        datetime.hour, datetime.minute, datetime.second,
        tz.hour,tz.minute)

def strGMT(datetime):

    """ strGMT(datetime)

    Returns the datetime instance as ARPA date string assuming it
    is given in GMT. """

    return '%s, %02i %s %04i %02i:%02i:%02i GMT' % (
        _days[datetime.day_of_week], datetime.day, 
        _months[datetime.month], datetime.year,
        datetime.hour, datetime.minute, datetime.second)

def strUTC(datetime):

    """ strUTC(datetime)

    Returns the datetime instance as ARPA date string assuming it
    is given in UTC. """

    return '%s, %02i %s %04i %02i:%02i:%02i UTC' % (
        _days[datetime.day_of_week], datetime.day, 
        _months[datetime.month], datetime.year,
        datetime.hour, datetime.minute, datetime.second)

def _test():
    import sys, os, rfc822
    file = os.path.join(os.environ['HOME'], 'nsmail/Inbox')
    f = open(file, 'r')
    while 1:
        m = rfc822.Message(f)
        if not m:
            break
        print 'From:', m.getaddr('from')
        print 'To:', m.getaddrlist('to')
        print 'Subject:', m.getheader('subject')
        raw = m.getheader('date')
        try:
            date = ParseDateTimeUTC(raw)
            print 'Date:',strUTC(date)
        except ValueError,why:
            print 'PROBLEMS:',repr(raw),'-->',why
            raw_input('...hit return to continue')
        print
        # Netscape mail file
        while 1:
            line = f.readline()
            if line[:6] == 'From -':
                break

if __name__ == '__main__':
    _test()