Fix #3591: DeviantArt commentaries: strip embedded image/text thumbs.

This commit is contained in:
evazion
2018-03-31 11:27:14 -05:00
parent 75cc1421a3
commit 04ecca6a06
2 changed files with 18 additions and 3 deletions

View File

@@ -47,7 +47,7 @@ class DText
stripped.strip
end
def self.from_html(text, &block)
def self.from_html(text, inline: false, &block)
html = Nokogiri::HTML.fragment(text)
dtext = html.children.map do |element|
@@ -79,14 +79,14 @@ class DText
title = from_html(element.inner_html, &block)
"#{hN}. #{title}\n\n"
when "a"
title = from_html(element.inner_html, &block).strip
title = from_html(element.inner_html, inline: true, &block).strip
url = element["href"]
%("#{title}":[#{url}]) if title.present? && url.present?
when "img"
alt_text = element.attributes["title"] || element.attributes["alt"] || ""
src = element["src"]
if element.parent.name == "a"
if inline
alt_text
elsif alt_text.present? && src.present?
%("#{alt_text}":[#{src}]\n\n)

View File

@@ -42,6 +42,21 @@ module Sources
def self.to_dtext(text)
DText.from_html(text) do |element|
# Convert embedded thumbnails of journal posts to 'deviantart #123'
# links. Strip embedded thumbnails of image posts. Example:
# https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
if element.name == "a" && element["data-sigil"] == "thumb"
element.name = "span"
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
if element["class"].split.include?("lit")
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i
element.content = "deviantart ##{deviation_id}"
else
element.content = ""
end
end
if element.name == "a" && element["href"].present?
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")