Conversion HTML vers texte via Nokogiri ou Elinks
This commit is contained in:
parent
74d208253d
commit
cbfd3daae7
|
@ -1,13 +1,16 @@
|
||||||
class EmailImporter
|
class EmailImporter
|
||||||
attr_accessor :email_class
|
attr_accessor :email_class
|
||||||
attr_accessor :metadata_mapping_class
|
attr_accessor :metadata_mapping_class
|
||||||
|
attr_accessor :html_to_text_class
|
||||||
|
|
||||||
def initialize(
|
def initialize(
|
||||||
email_class: Email,
|
email_class: Email,
|
||||||
metadata_mapping_class: MetadataMapping,
|
metadata_mapping_class: MetadataMapping,
|
||||||
|
html_to_text_class: Rails.configuration.html_to_text_class)
|
||||||
|
|
||||||
@email_class = email_class
|
@email_class = email_class
|
||||||
@metadata_mapping_class = metadata_mapping_class
|
@metadata_mapping_class = metadata_mapping_class
|
||||||
|
@html_to_text_class = html_to_text_class
|
||||||
end
|
end
|
||||||
|
|
||||||
def import(mail)
|
def import(mail)
|
||||||
|
@ -31,14 +34,7 @@ class EmailImporter
|
||||||
end
|
end
|
||||||
|
|
||||||
def delivered_to(mail)
|
def delivered_to(mail)
|
||||||
header = mail.header["Delivered-To"]
|
values_from_header(header: mail.header["Delivered-To"], default: Array(mail.to))
|
||||||
if header.respond_to?(:map)
|
|
||||||
header.map(&:value)
|
|
||||||
elsif header.respond_to?(:value)
|
|
||||||
header.value
|
|
||||||
else
|
|
||||||
mail.to
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def text_plain_body(mail)
|
def text_plain_body(mail)
|
||||||
|
@ -46,12 +42,12 @@ class EmailImporter
|
||||||
if mail.text_part.present?
|
if mail.text_part.present?
|
||||||
mail.text_part.decoded
|
mail.text_part.decoded
|
||||||
elsif mail.html_part.present?
|
elsif mail.html_part.present?
|
||||||
Nokogiri::HTML(mail.html_part.decoded).text
|
html_to_text_class.new.convert(mail.html_part.decoded)
|
||||||
else
|
else
|
||||||
mail.parts[0].decoded
|
mail.parts[0].decoded
|
||||||
end
|
end
|
||||||
elsif mail.content_type && mail.content_type.match?(/text\/html/)
|
elsif mail.content_type && mail.content_type.match?(/\btext\/html\b/)
|
||||||
Nokogiri::HTML(mail.decoded).text
|
html_to_text_class.new.convert(mail.decoded)
|
||||||
else
|
else
|
||||||
mail.decoded
|
mail.decoded
|
||||||
end
|
end
|
||||||
|
|
12
app/services/html_to_text/base.rb
Normal file
12
app/services/html_to_text/base.rb
Normal file
|
@ -0,0 +1,12 @@
|
||||||
|
module HtmlToText
|
||||||
|
|
||||||
|
class Error < ::StandardError
|
||||||
|
end
|
||||||
|
|
||||||
|
class Base
|
||||||
|
def convert(html)
|
||||||
|
fail NotImplementedError
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
20
app/services/html_to_text/elinks.rb
Normal file
20
app/services/html_to_text/elinks.rb
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
module HtmlToText
|
||||||
|
class Elinks < Base
|
||||||
|
|
||||||
|
attr_accessor :elinks_path
|
||||||
|
|
||||||
|
def initialize(elinks_path: "/usr/bin/elinks")
|
||||||
|
@elinks_path = elinks_path
|
||||||
|
end
|
||||||
|
|
||||||
|
def convert(html_input)
|
||||||
|
output, error, status = Open3.capture3("#{elinks_path} -dump -force-html", stdin_data: html_input)
|
||||||
|
if status.success?
|
||||||
|
output
|
||||||
|
else
|
||||||
|
raise Error, "Error calling elinks : #{error}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
11
app/services/html_to_text/nokogiri.rb
Normal file
11
app/services/html_to_text/nokogiri.rb
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
module HtmlToText
|
||||||
|
class Nokogiri < Base
|
||||||
|
|
||||||
|
def convert(html_input)
|
||||||
|
::Nokogiri::HTML(html_input).text
|
||||||
|
rescue Exception, ex
|
||||||
|
raise Error, "Error using Nokogiri : #{ex.message}"
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
end
|
|
@ -18,5 +18,15 @@ module Evobal
|
||||||
# Application configuration can go into files in config/initializers
|
# Application configuration can go into files in config/initializers
|
||||||
# -- all .rb files in that directory are automatically loaded after loading
|
# -- all .rb files in that directory are automatically loaded after loading
|
||||||
# the framework and any gems in your application.
|
# the framework and any gems in your application.
|
||||||
|
|
||||||
|
console do
|
||||||
|
# this block is called only when running console,
|
||||||
|
# so we can safely require pry here
|
||||||
|
require "pry"
|
||||||
|
config.console = Pry
|
||||||
|
end
|
||||||
|
|
||||||
|
config.html_to_text_class = HtmlToText::Elinks
|
||||||
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
Loading…
Reference in a new issue