From ccacfb40be914f4a943ff5978f0e2e5c42cc43fa Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 9 Jun 2017 09:49:02 -0500 Subject: [PATCH] deviantart: translate artist commentary from html to dtext. --- app/logical/sources/strategies/deviant_art.rb | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 42023befd..6e10aed97 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -32,6 +32,50 @@ module Sources end end + def self.to_dtext(text) + html = Nokogiri::HTML.fragment(text) + + dtext = html.children.map do |element| + case element.name + when "text" + element.content + when "br" + "\n" + when "blockquote" + "[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present? + when "small", "sub" + "[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present? + when "b" + "[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present? + when "i" + "[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present? + when "u" + "[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present? + when "strike" + "[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present? + when "li" + "* #{to_dtext(element.inner_html)}" if element.inner_html.present? + when "h1", "h2", "h3", "h4", "h5", "h6" + hN = element.name + title = to_dtext(element.inner_html) + "#{hN}. #{title}\n" + when "a" + title = to_dtext(element.inner_html) + url = element.attributes["href"].value + url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "") + %("#{title}":[#{url}]) if title.present? + when "img" + element.attributes["title"] || element.attributes["alt"] || "" + when "comment" + # ignored + else + to_dtext(element.inner_html) + end + end.join + + dtext + end + protected def get_profile_from_page(page) @@ -86,7 +130,7 @@ module Sources desc = page.search("div.dev-description div.text.block") if desc.any? - desc[0].inner_text + desc[0].children.to_s end end