265 lines
No EOL
7.3 KiB
Python
265 lines
No EOL
7.3 KiB
Python
# Copyright (c) 2013, Web Notes Technologies Pvt. Ltd. and Contributors
|
|
# MIT License. See license.txt
|
|
|
|
from __future__ import unicode_literals
|
|
import time
|
|
import poplib
|
|
import webnotes
|
|
from webnotes.utils import extract_email_id, convert_utc_to_user_timezone, now, cint
|
|
from webnotes.utils.scheduler import log
|
|
|
|
class EmailSizeExceededError(webnotes.ValidationError): pass
|
|
class EmailTimeoutError(webnotes.ValidationError): pass
|
|
class TotalSizeExceededError(webnotes.ValidationError): pass
|
|
|
|
class IncomingMail:
|
|
"""
|
|
Single incoming email object. Extracts, text / html and attachments from the email
|
|
"""
|
|
def __init__(self, content):
|
|
import email, email.utils
|
|
import datetime
|
|
|
|
self.mail = email.message_from_string(content)
|
|
|
|
self.text_content = ''
|
|
self.html_content = ''
|
|
self.attachments = []
|
|
self.parse()
|
|
self.set_content_and_type()
|
|
self.set_subject()
|
|
|
|
self.from_email = extract_email_id(self.mail["From"])
|
|
self.from_real_name = email.utils.parseaddr(self.mail["From"])[0]
|
|
|
|
if self.mail["Date"]:
|
|
utc = email.utils.mktime_tz(email.utils.parsedate_tz(self.mail["Date"]))
|
|
utc_dt = datetime.datetime.utcfromtimestamp(utc)
|
|
self.date = convert_utc_to_user_timezone(utc_dt).strftime('%Y-%m-%d %H:%M:%S')
|
|
else:
|
|
self.date = now()
|
|
|
|
def parse(self):
|
|
for part in self.mail.walk():
|
|
self.process_part(part)
|
|
|
|
def set_subject(self):
|
|
import email.header
|
|
_subject = email.header.decode_header(self.mail.get("Subject", "No Subject"))
|
|
self.subject = _subject[0][0] or ""
|
|
if _subject[0][1]:
|
|
self.subject = self.subject.decode(_subject[0][1])
|
|
else:
|
|
# assume that the encoding is utf-8
|
|
self.subject = self.subject.decode("utf-8")
|
|
|
|
def set_content_and_type(self):
|
|
self.content, self.content_type = '[Blank Email]', 'text/plain'
|
|
if self.text_content:
|
|
self.content, self.content_type = self.text_content, 'text/plain'
|
|
else:
|
|
self.content, self.content_type = self.html_content, 'text/html'
|
|
|
|
def process_part(self, part):
|
|
content_type = part.get_content_type()
|
|
charset = part.get_content_charset()
|
|
if not charset: charset = self.get_charset(part)
|
|
|
|
if content_type == 'text/plain':
|
|
self.text_content += self.get_payload(part, charset)
|
|
|
|
if content_type == 'text/html':
|
|
self.html_content += self.get_payload(part, charset)
|
|
|
|
if part.get_filename():
|
|
self.get_attachment(part, charset)
|
|
|
|
def get_text_content(self):
|
|
return self.text_content or self.html_content
|
|
|
|
def get_charset(self, part):
|
|
charset = part.get_content_charset()
|
|
if not charset:
|
|
import chardet
|
|
charset = chardet.detect(str(part))['encoding']
|
|
|
|
return charset
|
|
|
|
def get_payload(self, part, charset):
|
|
try:
|
|
return unicode(part.get_payload(decode=True),str(charset),"ignore")
|
|
except LookupError:
|
|
return part.get_payload()
|
|
|
|
def get_attachment(self, part, charset):
|
|
self.attachments.append({
|
|
'content-type': part.get_content_type(),
|
|
'filename': part.get_filename(),
|
|
'content': part.get_payload(decode=True),
|
|
})
|
|
|
|
def save_attachments_in_doc(self, doc):
|
|
from webnotes.utils.file_manager import save_file, MaxFileSizeReachedError
|
|
for attachment in self.attachments:
|
|
try:
|
|
fid = save_file(attachment['filename'], attachment['content'],
|
|
doc.doctype, doc.name)
|
|
except MaxFileSizeReachedError:
|
|
# WARNING: bypass max file size exception
|
|
pass
|
|
except webnotes.DuplicateEntryError:
|
|
# same file attached twice??
|
|
pass
|
|
|
|
def get_thread_id(self):
|
|
import re
|
|
l = re.findall('(?<=\[)[\w/-]+', self.subject)
|
|
return l and l[0] or None
|
|
|
|
class POP3Mailbox:
|
|
def __init__(self, args=None):
|
|
self.setup(args)
|
|
self.get_messages()
|
|
|
|
def setup(self, args=None):
|
|
# overrride
|
|
self.settings = args or webnotes._dict()
|
|
|
|
def check_mails(self):
|
|
# overrride
|
|
return True
|
|
|
|
def process_message(self, mail):
|
|
# overrride
|
|
pass
|
|
|
|
def connect(self):
|
|
if cint(self.settings.use_ssl):
|
|
self.pop = Timed_POP3_SSL(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
|
|
else:
|
|
self.pop = Timed_POP3(self.settings.host, timeout=webnotes.conf.get("pop_timeout"))
|
|
|
|
self.pop.user(self.settings.username)
|
|
self.pop.pass_(self.settings.password)
|
|
|
|
def get_messages(self):
|
|
if not self.check_mails():
|
|
return # nothing to do
|
|
|
|
webnotes.conn.commit()
|
|
self.connect()
|
|
|
|
try:
|
|
# track if errors arised
|
|
self.errors = False
|
|
pop_list = self.pop.list()[1]
|
|
num = num_copy = len(pop_list)
|
|
|
|
# WARNING: Hard coded max no. of messages to be popped
|
|
if num > 20: num = 20
|
|
|
|
# size limits
|
|
self.total_size = 0
|
|
self.max_email_size = cint(webnotes.local.conf.get("max_email_size"))
|
|
self.max_total_size = 5 * self.max_email_size
|
|
|
|
for i, pop_meta in enumerate(pop_list):
|
|
# do not pull more than NUM emails
|
|
if (i+1) > num:
|
|
break
|
|
|
|
try:
|
|
self.retrieve_message(pop_meta, i+1)
|
|
except (TotalSizeExceededError, EmailTimeoutError):
|
|
break
|
|
|
|
# WARNING: Mark as read - message number 101 onwards from the pop list
|
|
# This is to avoid having too many messages entering the system
|
|
num = num_copy
|
|
if num > 100 and not self.errors:
|
|
for m in xrange(101, num+1):
|
|
self.pop.dele(m)
|
|
finally:
|
|
# no matter the exception, pop should quit if connected
|
|
self.pop.quit()
|
|
|
|
def retrieve_message(self, pop_meta, msg_num):
|
|
incoming_mail = None
|
|
try:
|
|
self.validate_pop(pop_meta)
|
|
msg = self.pop.retr(msg_num)
|
|
|
|
incoming_mail = IncomingMail(b'\n'.join(msg[1]))
|
|
webnotes.conn.begin()
|
|
self.process_message(incoming_mail)
|
|
webnotes.conn.commit()
|
|
|
|
except (TotalSizeExceededError, EmailTimeoutError):
|
|
# propagate this error to break the loop
|
|
raise
|
|
|
|
except:
|
|
# log performs rollback and logs error in scheduler log
|
|
log("receive.get_messages", self.make_error_msg(msg_num, incoming_mail))
|
|
self.errors = True
|
|
webnotes.conn.rollback()
|
|
|
|
self.pop.dele(msg_num)
|
|
else:
|
|
self.pop.dele(msg_num)
|
|
|
|
def validate_pop(self, pop_meta):
|
|
# throttle based on email size
|
|
if not self.max_email_size:
|
|
return
|
|
|
|
m, size = pop_meta.split()
|
|
size = cint(size)
|
|
|
|
if size < self.max_email_size:
|
|
self.total_size += size
|
|
if self.total_size > self.max_total_size:
|
|
raise TotalSizeExceededError
|
|
else:
|
|
raise EmailSizeExceededError
|
|
|
|
def make_error_msg(self, msg_num, incoming_mail):
|
|
error_msg = "Error in retrieving email."
|
|
if not incoming_mail:
|
|
try:
|
|
# retrieve headers
|
|
incoming_mail = IncomingMail(b'\n'.join(self.pop.top(msg_num, 5)[1]))
|
|
except:
|
|
pass
|
|
|
|
if incoming_mail:
|
|
error_msg += "\nDate: {date}\nFrom: {from_email}\nSubject: {subject}\n".format(
|
|
date=incoming_mail.date, from_email=incoming_mail.from_email, subject=incoming_mail.subject)
|
|
|
|
return error_msg
|
|
|
|
class TimerMixin(object):
|
|
def __init__(self, *args, **kwargs):
|
|
self.timeout = kwargs.pop('timeout', 0.0)
|
|
self.elapsed_time = 0.0
|
|
self._super.__init__(self, *args, **kwargs)
|
|
|
|
def _getline(self, *args, **kwargs):
|
|
start_time = time.time()
|
|
ret = self._super._getline(self, *args, **kwargs)
|
|
|
|
self.elapsed_time += time.time() - start_time
|
|
if self.timeout and self.elapsed_time > self.timeout:
|
|
raise EmailTimeoutError
|
|
|
|
return ret
|
|
|
|
def quit(self, *args, **kwargs):
|
|
self.elapsed_time = 0.0
|
|
return self._super.quit(self, *args, **kwargs)
|
|
|
|
class Timed_POP3(TimerMixin, poplib.POP3):
|
|
_super = poplib.POP3
|
|
|
|
class Timed_POP3_SSL(TimerMixin, poplib.POP3_SSL):
|
|
_super = poplib.POP3_SSL |