From c280234698675721ac172861f6a85f6df64714e0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=A9my=20Lecour?= Date: Thu, 31 Dec 2020 11:56:13 +0100 Subject: [PATCH] =?UTF-8?q?Extraction=20de=20EmailImporter=20+=20d=C3=A9bu?= =?UTF-8?q?t=20de=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/mailboxes/in_mailbox.rb | 87 +---- app/services/email_importer.rb | 103 +++++ test/fixtures/emails.yml | 11 - test/fixtures/files/html_only.eml | 536 +++++++++++++++++++++++++++ test/services/email_importer_test.rb | 14 + 5 files changed, 656 insertions(+), 95 deletions(-) create mode 100644 app/services/email_importer.rb delete mode 100644 test/fixtures/emails.yml create mode 100644 test/fixtures/files/html_only.eml create mode 100644 test/services/email_importer_test.rb diff --git a/app/mailboxes/in_mailbox.rb b/app/mailboxes/in_mailbox.rb index dcb75e8..5c1b2f7 100644 --- a/app/mailboxes/in_mailbox.rb +++ b/app/mailboxes/in_mailbox.rb @@ -1,90 +1,9 @@ class InMailbox < ApplicationMailbox def process - - email = Email.new( - message_id: mail.message_id, - subject: mail.subject, - date: mail.date, - to: mail.to, - delivered_to: delivered_to(mail), - from: mail.from, - plain_body: text_plain_body(mail), - headers: hashed_headers(mail), - cron: sent_by_cron?(mail), - mailing_list: mailing_list?(mail), - clients: clients(mail), - servers: servers(mail), - tickets: tickets(mail) - ) - + email_importer = EmailImporter.new() repository = EmailRepository.new + + email = email_importer.import(mail) repository.save(email) - rescue => ex - binding.pry end - - def delivered_to(mail) - header = mail.header["Delivered-To"] - if header.present? - if header.respond_to?(:map) - header.map(&:value) - else - header.value - end - else - mail.to - end - end - - def text_plain_body(mail) - if mail.parts.present? - mail.text_part.decoded - else - mail.decoded - end - end - - def hashed_headers(mail) - mail.header.map { |h| - { - name: h.name, - value: h.value - } - } - end - - def sent_by_cron?(mail) - mail.subject.match?(/cron/i) \ - || mail.header["X-Cron-Env"].present? - end - def mailing_list?(mail) - mail.header["List-Unsubscribe"].present? - end - - def clients(mail) - Array(mail.header["X-Client-ID"].value) if mail.header["X-Client-ID"].present? - end - - def servers(mail) - if mail.header["X-Server-Name"].present? - Array(mail.header["X-Server-Name"].value) - else - matching_header = ["To", "Delivered-To", "From", "Subject"].detect { |header_name| - address_match_evolix_net?(mail.header[header_name].value) if mail.header[header_name].present? - } - Array(extract_server_name_from_address(mail.header[matching_header].value)) if matching_header - end - end - - def tickets(mail) - Array(mail.header["X-Ticket-ID"].value) if mail.header["X-Ticket-ID"].present? - end - - def address_match_evolix_net?(address) - address.match?(/@(.+)\.evolix\.net/i) - end - def extract_server_name_from_address(address) - address.match(/@(.+)\.evolix\.net/i)[1] - end - end diff --git a/app/services/email_importer.rb b/app/services/email_importer.rb new file mode 100644 index 0000000..8f0226f --- /dev/null +++ b/app/services/email_importer.rb @@ -0,0 +1,103 @@ +class EmailImporter + attr_accessor :klass + + def initialize(klass = Email) + @klass = klass + end + + def import(mail) + email = klass.new( + message_id: mail.message_id, + subject: mail.subject, + date: mail.date, + to: mail.to, + delivered_to: delivered_to(mail), + from: mail.from, + plain_body: text_plain_body(mail), + headers: hashed_headers(mail), + cron: sent_by_cron?(mail), + mailing_list: mailing_list?(mail), + clients: clients(mail), + servers: servers(mail), + tickets: tickets(mail) + ) + rescue => ex + binding.pry + end + + def delivered_to(mail) + if mail.header["Delivered-To"] + mail.header["Delivered-To"] + else + mail.to + end + end + + def text_plain_body(mail) + if mail.parts.present? + if mail.text_part.present? + mail.text_part.decoded + elsif mail.html_part.present? + Nokogiri::HTML(mail.html_part.decoded).text + else + mail.parts[0].decoded + end + elsif mail.content_type.match?(/text\/html/) + Nokogiri::HTML(mail.body.decoded).text + else + mail.body.decoded + end + end + + def hashed_headers(mail) + mail.header.map { |header| + { + name: header.name, + value: header.value + } + } + end + + def sent_by_cron?(mail) + (mail.subject.present? && mail.subject.match?(/cron/i)) \ + || mail.header["X-Cron-Env"].present? + end + def mailing_list?(mail) + mail.header["List-Unsubscribe"].present? + end + + def clients(mail) + values_for_header(mail.header["X-Client-ID"]) + end + + def servers(mail) + ["To", "Delivered-To", "From", "Subject"].filter_map() { |header| + mail.header[header] + }.flatten.map(&:value).select { |text| + if address_match_evolix_net?(text) + extract_server_name_from_address(text) + end + } + end + + def tickets(mail) + values_for_header(mail.header["X-Ticket-ID"]) + end + + def address_match_evolix_net?(address) + address.match?(/@(.+)\.evolix\.net/i) + end + def extract_server_name_from_address(address) + address.match(/@(.+)\.evolix\.net/i)[1] + end + + def values_for_header(header) + if header.present? + if header.respond_to?(:map) + header.map(&:value) + else + header.value + end + end + end +end diff --git a/test/fixtures/emails.yml b/test/fixtures/emails.yml deleted file mode 100644 index 5181636..0000000 --- a/test/fixtures/emails.yml +++ /dev/null @@ -1,11 +0,0 @@ -# Read about fixtures at https://api.rubyonrails.org/classes/ActiveRecord/FixtureSet.html - -# This model initially had no columns defined. If you add columns to the -# model remove the '{}' from the fixture names and add the columns immediately -# below each fixture, per the syntax in the comments below -# -one: {} -# column: value -# -two: {} -# column: value diff --git a/test/fixtures/files/html_only.eml b/test/fixtures/files/html_only.eml new file mode 100644 index 0000000..714b8a3 --- /dev/null +++ b/test/fixtures/files/html_only.eml @@ -0,0 +1,536 @@ +Return-Path: +X-Original-To: maintenance@evolix.fr +Delivered-To: equipe@evolix.fr +Received: from m207-15.mailgun.net (m207-15.mailgun.net [161.38.207.15]) + by pele.evolix.net (Postfix) with ESMTPS id 7D2077F5D1 + for ; Tue, 27 Oct 2020 12:58:19 +0100 (CET) +DKIM-Signature: a=rsa-sha256; v=1; c=relaxed/relaxed; d=mg.s35798.fr; q=dns/txt; + s=krs; t=1603799902; h=Content-Transfer-Encoding: Mime-Version: + Content-Type: Subject: From: To: Sender: Reply-To: Message-Id: Date: + List-Unsubscribe: List-Unsubscribe-Post; + bh=bLPQd8jozY53WvIsLaomSUDhri3r+GcUcKSlfcoIpnY=; b=i7Kt8MTqEqLNlpP2Lm38A/l3Ynay1yZuPGDx7+ZfBpGWrOtyknKQRpU0NJyIilJGTSS6iLdc + 1Ezqr/7kR8O2zGxTM9m/Z4P4PJTr527ue+Ncl3dWfAcIM1MpvgAVvtbSkXnebRe26no9q1v/ + aQJH+fwxGUsRAjns2RsRfwGGwXY= +X-Mailgun-Sending-Ip: 161.38.207.15 +X-Mailgun-Sid: WyJmMTRjNyIsICJtYWludGVuYW5jZUBldm9saXguZnIiLCAiYWM1NyJd +List-Unsubscribe-Post: List-Unsubscribe=One-Click +List-Unsubscribe: , + +X-Mailgun-Batch-Id: 5f980b55fe8623b9ec342122 +Received: by luna.mailgun.net with HTTP; Tue, 27 Oct 2020 11:58:13 +0000 +Date: Tue, 27 Oct 2020 11:58:13 +0000 +Message-Id: <20201027115813.1.F2BD814B7DEAF8CE@mg.s35798.fr> +Reply-To: Alerte Formation Alerte Formation + +Sender: Alerte Formation +X-Mailgun-Tag: academy-fne-400k_175 +To: '' +From: Alerte Formation +Subject: =?utf-8?q?Fin_du_FNE_le_31/10_=3A_plus_que_quelques_jours_pour_f?= + =?utf-8?q?aire_financer_votre_formation_=C3=A0_100=25?= +Content-Type: text/html; charset="utf-8" +Mime-Version: 1.0 +Content-Transfer-Encoding: quoted-printable + + + + + + + + +

Attention, plus que que= +lques jours pour b=C3=A9n=C3=A9ficier du FNE !

+ +
+
Attention, plus que quelques jours pour béné= +ficier du FNE !
+ + +
‌ ‌ ‌ ‌ ‌&nbs= +p;‌ ‌ ‌ ‌ ‌ ‌ &= +zwnj; ‌ ‌ ‌ ‌ ‌ &zwn= +j; 
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
3D"alt_te== +
+ + + + + + +
+

Attention, plus que quelques= + jours pour bénéficier du FNE !

+ +

Le 31 Octobre, il sera trop= + tard pour bénéficier d’une formation financée &= +agrave; 100% par l’Etat

+ +

Le fonds National pour l’emploi a= +ussi appelé FNE a été mis en place à la suite d= +e la crise saitaire. Ce dispositif vous permet de faire financer à 1= +00% votre formation professionnelle si votre entreprise connaît de l&= +rsquo;activité partielle, et ce quel que soit votre secteur d’= +activité.

+
+
+ + + + + + +
+

Des milliers de français ont auj= +ourd’hui bénéficié du FNE, mais la fin de cette = +aide exceptionnelle est annoncée pour le 31 Octobre. Elle ne couvrir= +a en effet plus par la suite l’intégralité de vos co&uc= +irc;ts formation.

+
+
+ + + + + + +
+ + + + + + +
E= +N SAVOIR PLUS
+
+
+ + + + + + +
+

Nous vous accompagnons pas= + à pas dans les démarches et le montage du dossier de finance= +ment,

+
+
+ + + + + + +
+ + + + + + +
CON= +TACTER UN CONSEILLER
+
+
+ + + + + + + + + +
+ + + + + + +
+=3D"alt_text"
+
+ + + + + + +
Le dévelo= +ppement des compétences
+ au service de votre carrière professionnelle
+
+
+ + + + + + +
 
+
+

Si vous ne souhaitez plus recevoir de messages, cliquez-ici, Signaler un email indésirable, Unsubscribe

+ + + + + + +
+
+
+3D"" diff --git a/test/services/email_importer_test.rb b/test/services/email_importer_test.rb new file mode 100644 index 0000000..fcac50b --- /dev/null +++ b/test/services/email_importer_test.rb @@ -0,0 +1,14 @@ +require 'test_helper' + +class EmailImporterTest < ActiveSupport::TestCase + test "convert html to text when html only" do + eml = file_fixture("html_only.eml").read + mail = Mail.from_source(eml) + email_importer = EmailImporter.new + + email = email_importer.import(mail) + + assert_match(/Attention, plus que quelques jours pour bénéficier du FNE !/, email.plain_body) + assert_no_match(/<\/?(p|b|br|img)\/?>/, email.plain_body) + end +end