# ex: ts=4 tw=0 sw=4 expandtab import mailbox, email, re import StringIO """ css classes: body: the body of the email quoted: quoted text sig: duh level1header: top level headers level2header: you get the idea resentheader: for resent-* level1content: ... level2content: ... resentcontent: ... """ CSSFILE = "../mailformat.css" headermap = [ [['To','From','Cc','Bcc','Date','Subject','Newsgroups'],['level1header','level1content']], [['User-Agent','Organization','List-Id','Reply-To','In-Reply-To','Message-Id','References'],['level2header','level2content']], [['Resent-From','Resent-Bcc','Resent-Cc'],['resentheader','resentcontent']] ] class chunk: def __init__(self,chunk,type): self.chunk = chunk self.type = type class body_chunker: def __init__(self,mailbody): self.body = mailbody self.chunks = [] self.inquote = False self.insig = False self._build_chunks() def _build_chunks(self): quote = "" body = "" sig = "" for line in StringIO.StringIO(self.body).readlines(): if re.search("^(>|\|).*",line): quote += line if self.inquote: continue else: self.chunks.append(chunk(body,'body')) body = "" self.inquote = True elif re.search("^-- $",line) or self.insig: self.insig = True sig += line else: if self.inquote: #we've reached the end of a quote self.chunks.append(chunk(quote,'quote')) quote = "" self.inquote = False body += line if quote != "": self.chunks.append(chunk(quote,'quote')) if body != "": self.chunks.append(chunk(body,'body')) if self.insig: self.chunks.append(chunk(sig,'sig')) class mailformatter: def __init__(self,filename): self.filename = filename self.data="" self.cssfile = CSSFILE def setcss(self, cssfile): self.cssfile = cssfile def _do_headers(self,msg): """loop through each headergroup => color mapping, and return the html""" headers = "" for headergroup in headermap: for header in headergroup[0]: if msg.has_key(header): headers += """%s: %s\n""" % (headergroup[1][0],header,headergroup[1][1], self._htmlescape(msg[header])) headers +="\n" return headers def _do_body(self,msg): data = "" if msg.is_multipart(): for m in msg.get_payload(): data += self._do_body(m) else: #only return text/plain messages, the True argument to get_payload #will decode it if it's qoupri or base64 if msg.get_content_type() == "text/plain": data = self._sub_all(msg.get_payload(None, True)) return data def _do_urls(self,chunk,cls): return re.sub("(http://[^ <>\n]+)","\\1" % cls,chunk) def _sub_all(self,chunk): data = "" self.b = body_chunker(chunk) for part in self.b.chunks: part.chunk = self._do_urls(self._htmlescape(part.chunk),part.type) data += "%s" % (part.type,part.chunk) return data def _stringfactory(self, fp): return fp.read() def _htmlescape(self,foo): return foo.replace("&","&").replace("<","<").replace(">",">") def parse(self): f = file(self.filename,'r') if f: self.ct = "text/html" self.data += """
\n""" % (self.filename.split('/')[-1],self.cssfile)
mbox = mailbox.UnixMailbox(f, self._stringfactory)
msg = mbox.next()
if msg:
while msg:
self._do_single_msg(msg)
msg = mbox.next()
else:
mbox.fp.seek(0)
self._do_single_msg(mbox.fp.read())
self.data += "\n\n\n\n"
f.close()
else:
self.ct = "text/plain"
self.data = "Error opening file"
def _do_single_msg(self,file):
m = email.message_from_string(file)
self.data += self._do_headers(m)
self.data += self._do_body(m)
self.data += "\n"
def dump(self):
return self.data
def get_ct(self):
return self.ct