# Copyright (C) 2001-2004 Python Software Foundation # Author: Barry Warsaw, Thomas Wouters, Anthony Baxter # Contact: email-sig@python.org """A parser of RFC 2822 and MIME email messages.""" import re from cStringIO import StringIO from email.FeedParser import FeedParser from email.Message import Message NLCRE = re.compile('\r\n|\r|\n') class Parser: def __init__(self, _class=Message, strict=False): """Parser of RFC 2822 and MIME email messages. Creates an in-memory object tree representing the email message, which can then be manipulated and turned over to a Generator to return the textual representation of the message. The string must be formatted as a block of RFC 2822 headers and header continuation lines, optionally preceeded by a `Unix-from' header. The header block is terminated either by the end of the string or by a blank line. _class is the class to instantiate for new message objects when they must be created. This class must have a constructor that can take zero arguments. Default is Message.Message. Optional strict tells the parser to be strictly RFC compliant or to be more forgiving in parsing of ill-formatted MIME documents. When non-strict mode is used, the parser will try to make up for missing or erroneous boundaries and other peculiarities seen in the wild. Default is non-strict parsing. """ self._class = _class def parse(self, fp, headersonly=False): """Create a message structure from the data in a file. Reads all the data from the file and returns the root of the message structure. Optional headersonly is a flag specifying whether to stop parsing after reading the headers or not. The default is False, meaning it parses the entire contents of the file. """ feedparser = FeedParser(self._class) if headersonly: feedparser._set_headersonly() while True: data = fp.read(8192) if not data: break feedparser.feed(data) return feedparser.close() def parsestr(self, text, headersonly=False): """Create a message structure from a string. Returns the root of the message structure. Optional headersonly is a flag specifying whether to stop parsing after reading the headers or not. The default is False, meaning it parses the entire contents of the file. """ return self.parse(StringIO(text), headersonly=headersonly) class HeaderParser(Parser): def parse(self, fp, headersonly=True): return Parser.parse(self, fp, True) def parsestr(self, text, headersonly=True): return Parser.parsestr(self, text, True)