diff --git a/app/services/email_importer.rb b/app/services/email_importer.rb index 48fe5fe..e32c632 100644 --- a/app/services/email_importer.rb +++ b/app/services/email_importer.rb @@ -1,13 +1,16 @@ class EmailImporter attr_accessor :email_class attr_accessor :metadata_mapping_class + attr_accessor :html_to_text_class def initialize( email_class: Email, metadata_mapping_class: MetadataMapping, + html_to_text_class: Rails.configuration.html_to_text_class) @email_class = email_class @metadata_mapping_class = metadata_mapping_class + @html_to_text_class = html_to_text_class end def import(mail) @@ -31,14 +34,7 @@ class EmailImporter end def delivered_to(mail) - header = mail.header["Delivered-To"] - if header.respond_to?(:map) - header.map(&:value) - elsif header.respond_to?(:value) - header.value - else - mail.to - end + values_from_header(header: mail.header["Delivered-To"], default: Array(mail.to)) end def text_plain_body(mail) @@ -46,12 +42,12 @@ class EmailImporter if mail.text_part.present? mail.text_part.decoded elsif mail.html_part.present? - Nokogiri::HTML(mail.html_part.decoded).text + html_to_text_class.new.convert(mail.html_part.decoded) else mail.parts[0].decoded end - elsif mail.content_type && mail.content_type.match?(/text\/html/) - Nokogiri::HTML(mail.decoded).text + elsif mail.content_type && mail.content_type.match?(/\btext\/html\b/) + html_to_text_class.new.convert(mail.decoded) else mail.decoded end diff --git a/app/services/html_to_text/base.rb b/app/services/html_to_text/base.rb new file mode 100644 index 0000000..7d3cc29 --- /dev/null +++ b/app/services/html_to_text/base.rb @@ -0,0 +1,12 @@ +module HtmlToText + + class Error < ::StandardError + end + + class Base + def convert(html) + fail NotImplementedError + end + end + +end diff --git a/app/services/html_to_text/elinks.rb b/app/services/html_to_text/elinks.rb new file mode 100644 index 0000000..f0d63aa --- /dev/null +++ b/app/services/html_to_text/elinks.rb @@ -0,0 +1,20 @@ +module HtmlToText + class Elinks < Base + + attr_accessor :elinks_path + + def initialize(elinks_path: "/usr/bin/elinks") + @elinks_path = elinks_path + end + + def convert(html_input) + output, error, status = Open3.capture3("#{elinks_path} -dump -force-html", stdin_data: html_input) + if status.success? + output + else + raise Error, "Error calling elinks : #{error}" + end + end + + end +end diff --git a/app/services/html_to_text/nokogiri.rb b/app/services/html_to_text/nokogiri.rb new file mode 100644 index 0000000..00ddacb --- /dev/null +++ b/app/services/html_to_text/nokogiri.rb @@ -0,0 +1,11 @@ +module HtmlToText + class Nokogiri < Base + + def convert(html_input) + ::Nokogiri::HTML(html_input).text + rescue Exception, ex + raise Error, "Error using Nokogiri : #{ex.message}" + end + + end +end diff --git a/config/application.rb b/config/application.rb index 63ad398..15eba5c 100644 --- a/config/application.rb +++ b/config/application.rb @@ -18,5 +18,15 @@ module Evobal # Application configuration can go into files in config/initializers # -- all .rb files in that directory are automatically loaded after loading # the framework and any gems in your application. + + console do + # this block is called only when running console, + # so we can safely require pry here + require "pry" + config.console = Pry + end + + config.html_to_text_class = HtmlToText::Elinks + end end