diff --git a/README.md b/README.md index 2c28b90..333c1ec 100644 --- a/README.md +++ b/README.md @@ -187,7 +187,13 @@ mail.text_not_managed: all not managed text (check the warning logs to find cont mail.to mail.to_domains mail.timezone: returns the timezone, offset from UTC -mail_partial: returns only the mains parts of emails +mail.mail_partial: returns only the mains parts of emails +``` + +It's possible to write the attachments on disk with the method: + +``` +mail.write_attachments(base_path) ``` ## Usage from command-line diff --git a/mailparser/mailparser.py b/mailparser/mailparser.py index 62da6bc..412f128 100644 --- a/mailparser/mailparser.py +++ b/mailparser/mailparser.py @@ -42,6 +42,7 @@ ported_open, ported_string, receiveds_parsing, + write_attachments, ) from .exceptions import MailParserEnvironmentError @@ -348,10 +349,14 @@ def parse(self): # walk all mail parts for i, p in enumerate(parts): if not p.is_multipart(): - filename = decode_header_part(p.get_filename()) charset = p.get_content_charset('utf-8') charset_raw = p.get_content_charset() log.debug("Charset {!r} part {!r}".format(charset, i)) + content_id = ported_string(p.get('content-id')) + log.debug("content-id {!r} part {!r}".format( + content_id, i)) + filename = decode_header_part( + p.get_filename("{}".format(content_id))) # this is an attachment if filename: @@ -365,9 +370,6 @@ def parse(self): p.get('content-transfer-encoding', '')).lower() log.debug("Transfer encoding {!r} part {!r}".format( transfer_encoding, i)) - content_id = ported_string(p.get('content-id')) - log.debug("content-id {!r} part {!r}".format( - content_id, i)) content_disposition = ported_string( p.get('content-disposition')) log.debug("content-disposition {!r} part {!r}".format( @@ -473,6 +475,16 @@ def get_server_ipaddress(self, trust): log.debug("IP {!r} not private".format(ip_str)) return ip_str + def write_attachments(self, base_path): + """ This method writes the attachments of mail on disk + + Arguments: + base_path {str} -- Base path where write the attachments + """ + write_attachments( + attachments=self.attachments, + base_path=base_path) + def __getattr__(self, name): name = name.strip("_").lower() name_header = name.replace("_", "-") diff --git a/mailparser/utils.py b/mailparser/utils.py index 9b9c9af..428e520 100644 --- a/mailparser/utils.py +++ b/mailparser/utils.py @@ -31,8 +31,10 @@ import hashlib import logging import os +import random import re import simplejson as json +import string import subprocess import sys import tempfile @@ -456,12 +458,19 @@ def get_header(message, name): name (string): header to get Returns: - decoded header + str if there is an header + list if there are more than one """ - header = message.get(name) - log.debug("Getting header {!r}: {!r}".format(name, header)) - if header: - return decode_header_part(header) + + headers = message.get_all(name) + log.debug("Getting header {!r}: {!r}".format(name, headers)) + if headers: + headers = [decode_header_part(i) for i in headers] + if len(headers) == 1: + # in this case return a string + return headers[0] + # in this case return a list + return headers return six.text_type() @@ -551,3 +560,16 @@ def write_sample(binary, payload, path, filename): # pragma: no cover else: with open(sample, "w") as f: f.write(payload) + + +def random_string(string_length=10): + """ Generate a random string of fixed length + + Keyword Arguments: + string_length {int} -- String length (default: {10}) + + Returns: + str -- Random string + """ + letters = string.ascii_lowercase + return ''.join(random.choice(letters) for i in range(string_length)) diff --git a/mailparser/version.py b/mailparser/version.py index 563eb1a..9c913b6 100644 --- a/mailparser/version.py +++ b/mailparser/version.py @@ -17,7 +17,7 @@ limitations under the License. """ -__version__ = "3.11.0" +__version__ = "3.12.0" if __name__ == "__main__": print(__version__) diff --git a/requirements-dev.txt b/requirements-dev.txt index c506da0..9dc5ac3 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -1,7 +1,7 @@ # tool ipaddress==1.0.23 simplejson==3.17.0 -six==1.13.0 +six==1.14.0 # dev coverage==5.0.2 diff --git a/requirements.txt b/requirements.txt index 7159060..4694111 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ ipaddress==1.0.23 simplejson==3.17.0 -six==1.13.0 \ No newline at end of file +six==1.14.0 \ No newline at end of file diff --git a/setup.py b/setup.py index b95b541..959bd43 100644 --- a/setup.py +++ b/setup.py @@ -41,6 +41,7 @@ license="Apache License, Version 2.0", url="https://github.com/SpamScope/mail-parser", long_description=long_description, + long_description_content_type="text/markdown", version=__version__, author="Fedele Mantuano", author_email="mantuano.fedele@gmail.com", diff --git a/tests/mails/mail_test_14 b/tests/mails/mail_test_14 index f259804..319a269 100644 --- a/tests/mails/mail_test_14 +++ b/tests/mails/mail_test_14 @@ -4,6 +4,8 @@ Date: Wed, 24 Apr 2019 10:05:02 +0200 (CEST) Mime-Version: 1.0 Content-Type: multipart/mixed; boundary="===============8544575414772382491==" To: rcpt@example.com +Received-SPF: custom_header1 +Received-SPF: custom_header2 --===============8544575414772382491== Content-Type: text/html; charset=UTF-8 diff --git a/tests/test_mail_parser.py b/tests/test_mail_parser.py index 9ac6a53..ed451ca 100644 --- a/tests/test_mail_parser.py +++ b/tests/test_mail_parser.py @@ -20,6 +20,7 @@ import datetime import logging import os +import shutil import six import sys import unittest @@ -42,6 +43,7 @@ ported_string, receiveds_parsing, parse_received, + random_string, ) from mailparser.exceptions import MailParserEnvironmentError @@ -87,6 +89,27 @@ def setUp(self): mail_malformed_2, mail_malformed_3) + def test_write_attachments(self): + attachments = [ + "<_1_0B4E44A80B15F6FC005C1243C12580DD>", + "<_1_0B4E420C0B4E3DD0005C1243C12580DD>", + "<_1_0B4E24640B4E1564005C1243C12580DD>", + "Move To Eight ZWEP6227F.pdf"] + random_path = os.path.join(root, "tests", random_string()) + mail = mailparser.parse_from_file(mail_test_10) + os.makedirs(random_path) + mail.write_attachments(random_path) + for i in attachments: + self.assertTrue(os.path.exists(os.path.join(random_path, i))) + shutil.rmtree(random_path) + + def test_issue62(self): + mail = mailparser.parse_from_file(mail_test_14) + received_spf = mail.Received_SPF + self.assertIsInstance(received_spf, list) + self.assertIn("custom_header1", received_spf) + self.assertIn("custom_header2", received_spf) + def test_html_field(self): mail = mailparser.parse_from_file(mail_malformed_1) self.assertIsInstance(mail.text_html, list) @@ -117,6 +140,8 @@ def test_mail_partial(self): self.assertNotIn("x-ibm-av-version", mail.mail_partial) result = mail.mail_partial_json self.assertIsInstance(result, six.text_type) + nr_attachments = len(mail._attachments) + self.assertEqual(nr_attachments, 4) def test_not_parsed_received(self): mail = mailparser.parse_from_file(mail_test_9)