Conversion HTML vers texte via Nokogiri ou Elinks

This commit is contained in:
Jérémy Lecour 2021-01-04 22:05:42 +01:00 committed by Jérémy Lecour
parent 74d208253d
commit cbfd3daae7
5 changed files with 60 additions and 11 deletions

View file

@ -1,13 +1,16 @@
class EmailImporter
attr_accessor :email_class
attr_accessor :metadata_mapping_class
attr_accessor :html_to_text_class
def initialize(
email_class: Email,
metadata_mapping_class: MetadataMapping,
html_to_text_class: Rails.configuration.html_to_text_class)
@email_class = email_class
@metadata_mapping_class = metadata_mapping_class
@html_to_text_class = html_to_text_class
end
def import(mail)
@ -31,14 +34,7 @@ class EmailImporter
end
def delivered_to(mail)
header = mail.header["Delivered-To"]
if header.respond_to?(:map)
header.map(&:value)
elsif header.respond_to?(:value)
header.value
else
mail.to
end
values_from_header(header: mail.header["Delivered-To"], default: Array(mail.to))
end
def text_plain_body(mail)
@ -46,12 +42,12 @@ class EmailImporter
if mail.text_part.present?
mail.text_part.decoded
elsif mail.html_part.present?
Nokogiri::HTML(mail.html_part.decoded).text
html_to_text_class.new.convert(mail.html_part.decoded)
else
mail.parts[0].decoded
end
elsif mail.content_type && mail.content_type.match?(/text\/html/)
Nokogiri::HTML(mail.decoded).text
elsif mail.content_type && mail.content_type.match?(/\btext\/html\b/)
html_to_text_class.new.convert(mail.decoded)
else
mail.decoded
end

View file

@ -0,0 +1,12 @@
module HtmlToText
class Error < ::StandardError
end
class Base
def convert(html)
fail NotImplementedError
end
end
end

View file

@ -0,0 +1,20 @@
module HtmlToText
class Elinks < Base
attr_accessor :elinks_path
def initialize(elinks_path: "/usr/bin/elinks")
@elinks_path = elinks_path
end
def convert(html_input)
output, error, status = Open3.capture3("#{elinks_path} -dump -force-html", stdin_data: html_input)
if status.success?
output
else
raise Error, "Error calling elinks : #{error}"
end
end
end
end

View file

@ -0,0 +1,11 @@
module HtmlToText
class Nokogiri < Base
def convert(html_input)
::Nokogiri::HTML(html_input).text
rescue Exception, ex
raise Error, "Error using Nokogiri : #{ex.message}"
end
end
end

View file

@ -18,5 +18,15 @@ module Evobal
# Application configuration can go into files in config/initializers
# -- all .rb files in that directory are automatically loaded after loading
# the framework and any gems in your application.
console do
# this block is called only when running console,
# so we can safely require pry here
require "pry"
config.console = Pry
end
config.html_to_text_class = HtmlToText::Elinks
end
end