Help on Code logic to remove duplicate mails from webapp mail box
eman banerjee
emanbanerjee at gmail.com
Wed Oct 30 03:34:14 EDT 2019
On Wednesday, 30 October 2019 12:40:06 UTC+5:30, eman banerjee wrote:
> Hi
>
> I am working on a project where we make connections to webapp mail and extract subject, sender,body etc from mails and save it in dataframe and insert it n SQL DB.
>
> My next challenge is to remove any duplicate mails from mailbox.
> Could you kindly help me.
> It can be a new mail which is entering the mailbox is first checked , if its a duplicate email,it will not be inserted to mail box
>
>
> Thanks
code is below
class EmailAnalysis:
'''Method help to communicate with outlook'''
def emailExchangeCredentials(self):
try:
cipher = Fernet(b'***************')
account_details = b'*****************'
user_details = b'****************'
sec_details = b'*****************'
account_details_decrypt = cipher.decrypt(account_details).decode('utf-8')
user_details_decrypt = cipher.decrypt(user_details).decode('utf-8')
sec_details_decrypt = cipher.decrypt(sec_details).decode('utf-8')
credentials = Credentials(user_details_decrypt, sec_details_decrypt)
account = Account(account_details_decrypt, credentials=credentials, autodiscover=True)
return account
except:
account = 'Failure'
return account
def createDBConn(self):
cipher = Fernet(b'*********************')
ip = b'*******************'
port = ****
service_name = b'*************'
attuid = b'**********************'
password = b'********************'
ip_decrypt = cipher.decrypt(ip).decode('utf-8')
service_name_decrypt = cipher.decrypt(service_name).decode('utf-8')
attuid_decrypt = cipher.decrypt(attuid).decode('utf-8')
password_decrypt = cipher.decrypt(password).decode('utf-8')
dsn_tns = ora.makedsn(ip_decrypt, port,service_name_decrypt)
conn = ora.connect(attuid_decrypt,password_decrypt,dsn_tns)
return conn
def extractEmail(self, account, Email_Data, conn):
self.Email_Data = Email_Data
SUBJECT = []
SENDER = []
JOB_NAME = []
EMAIL_DATE = []
EMAIL_BODY = []
REMEDIATION_ID = []
PRIORITY = []
i = 0
to_folder = account.inbox / 'Read-Mail'
cursor = conn.cursor()
#print("Type.......................:",type(account.inbox.all()))
for item in account.inbox.all().order_by('-datetime_received')[:20]:
if len(item.sender.email_address.split('*****.com'))>1:
if len(item.subject.split('GREEN'))<2:
SUBJECT.append(item.subject)
SENDER.append(item.sender.email_address)
find_Job = item.sender.email_address.split('@')
job_name = find_Job[0]
JOB_NAME.append(str(job_name))
#print("Date time received: ",item.datetime_received.date())
EMAIL_DATE.append(item.datetime_received.date())
date = item.datetime_received.date()
date_ = date.strftime('%Y-%m-%d')
print("Date:",date_)
soup = BeautifulSoup(item.body)
email_body_unfor = soup.get_text()
#print("Emain Body Ori:- ",email_body_unfor)
email_body_unfor_arr = email_body_unfor.split('-->')
if len(email_body_unfor_arr) > 1:
email_body_unfor_foot = email_body_unfor_arr[1]
#print("Emain Body Ori2:- ",email_body_unfor_foot)
email_body_unfor_sp_arr = email_body_unfor_foot.split("This message is for the designated recipient only")
if len(email_body_unfor_sp_arr) > 1:
email_body_unfor_sp = email_body_unfor_sp_arr[0]
#print("Emain Body Ori3:- ",email_body_unfor_sp)
email_body_1 = email_body_unfor_sp.strip()
email_body_2 = email_body_1.rstrip()
email_body = email_body_2.lstrip()
#print("Email Body: ",email_body)
EMAIL_BODY.append(email_body)
else:
email_body_1 = email_body_unfor.strip()
email_body_2 = email_body_1.rstrip()
email_body = email_body_2.lstrip()
EMAIL_BODY.append(email_body)
i = i+1
i = 1000+i
Issue_ID = i
PROCESS_NAME = str(find_Job[0])
MESSAGE_1 = ''
MESSAGE_2 = ''
MESSAGE = email_body
sql = "INSERT INTO ***DBname** (SENDER,SUBJECT,EMAIL_BODY,EMAIL_DATE) VALUES ("+"'"+str(item.sender.email_address)+"',"+"'"+str(item.subject)+"',"+"'"+str(email_body)+"',"+"'"+str(date_)+"')"
print('SQL :- ',sql)
result = cursor.execute(sql.encode('utf-8'))
conn.commit()
#print('to folder:-',to_folder)
item.move(to_folder)
else:
item.move(to_folder)
else:
item.move(to_folder)
cursor.close()
conn.close()
Email_Data = pd.DataFrame(list(zip(SUBJECT, SENDER,EMAIL_DATE, EMAIL_BODY)),columns =['SUBJECT', 'SENDER', 'EMAIL_DATE', 'EMAIL_BODY'])
return Email_Data
def emailAnalysisController(self,Task_Details):
try:
#print('inside controller')
self.Task_Details = Task_Details
acc = self.emailExchangeCredentials(self)
if acc == 'Failure':
acc = self.emailExchangeCredentials(self)
Email_Data = pd.DataFrame(columns=['EMAIL_DATE','SENDER','EMAIL_BODY','SUBJECT'])
Email_Data['EMAIL_DATE'] = pd.to_datetime(Email_Data['EMAIL_DATE'])
conn = self.createDBConn(self)
Email_Data = self.extractEmail(self, acc,Email_Data, conn)
exection_status = 'Success'
return exection_status
except:
exection_status = 'Failure'
return exection_status
More information about the Python-list
mailing list