sources: factor out html-to-dtext code to DText.from_html.
This commit is contained in:
@@ -369,6 +369,53 @@ class DText
|
|||||||
s
|
s
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def self.from_html(text, &block)
|
||||||
|
html = Nokogiri::HTML.fragment(text)
|
||||||
|
|
||||||
|
dtext = html.children.map do |element|
|
||||||
|
block.call(element) if block.present?
|
||||||
|
|
||||||
|
case element.name
|
||||||
|
when "text"
|
||||||
|
element.content
|
||||||
|
when "br"
|
||||||
|
"\n"
|
||||||
|
when "p"
|
||||||
|
from_html(element.inner_html, &block) + "\n\n"
|
||||||
|
when "blockquote"
|
||||||
|
"[quote]#{from_html(element.inner_html, &block)}[/quote]" if element.inner_html.present?
|
||||||
|
when "small", "sub"
|
||||||
|
"[tn]#{from_html(element.inner_html, &block)}[/tn]" if element.inner_html.present?
|
||||||
|
when "b", "strong"
|
||||||
|
"[b]#{from_html(element.inner_html, &block)}[/b]" if element.inner_html.present?
|
||||||
|
when "i", "em"
|
||||||
|
"[i]#{from_html(element.inner_html, &block)}[/i]" if element.inner_html.present?
|
||||||
|
when "u"
|
||||||
|
"[u]#{from_html(element.inner_html, &block)}[/u]" if element.inner_html.present?
|
||||||
|
when "s", "strike"
|
||||||
|
"[s]#{from_html(element.inner_html, &block)}[/s]" if element.inner_html.present?
|
||||||
|
when "li"
|
||||||
|
"* #{from_html(element.inner_html, &block)}" if element.inner_html.present?
|
||||||
|
when "h1", "h2", "h3", "h4", "h5", "h6"
|
||||||
|
hN = element.name
|
||||||
|
title = from_html(element.inner_html, &block)
|
||||||
|
"#{hN}. #{title}\n"
|
||||||
|
when "a"
|
||||||
|
title = from_html(element.inner_html, &block)
|
||||||
|
url = element["href"]
|
||||||
|
%("#{title}":[#{url}]) if title.present? && url.present?
|
||||||
|
when "img"
|
||||||
|
element.attributes["title"] || element.attributes["alt"] || ""
|
||||||
|
when "comment"
|
||||||
|
# ignored
|
||||||
|
else
|
||||||
|
from_html(element.inner_html, &block)
|
||||||
|
end
|
||||||
|
end.join
|
||||||
|
|
||||||
|
dtext
|
||||||
|
end
|
||||||
|
|
||||||
# extract the first paragraph `needle` occurs in.
|
# extract the first paragraph `needle` occurs in.
|
||||||
def self.excerpt(dtext, needle)
|
def self.excerpt(dtext, needle)
|
||||||
dtext = dtext.gsub(/\r\n|\r|\n/, "\n")
|
dtext = dtext.gsub(/\r\n|\r|\n/, "\n")
|
||||||
|
|||||||
@@ -36,47 +36,11 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.to_dtext(text)
|
def self.to_dtext(text)
|
||||||
html = Nokogiri::HTML.fragment(text)
|
DText.from_html(text) do |element|
|
||||||
|
if element.name == "a" && element["href"].present?
|
||||||
dtext = html.children.map do |element|
|
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
|
||||||
case element.name
|
|
||||||
when "text"
|
|
||||||
element.content
|
|
||||||
when "br"
|
|
||||||
"\n"
|
|
||||||
when "blockquote"
|
|
||||||
"[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present?
|
|
||||||
when "small", "sub"
|
|
||||||
"[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present?
|
|
||||||
when "b"
|
|
||||||
"[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present?
|
|
||||||
when "i"
|
|
||||||
"[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present?
|
|
||||||
when "u"
|
|
||||||
"[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present?
|
|
||||||
when "strike"
|
|
||||||
"[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present?
|
|
||||||
when "li"
|
|
||||||
"* #{to_dtext(element.inner_html)}" if element.inner_html.present?
|
|
||||||
when "h1", "h2", "h3", "h4", "h5", "h6"
|
|
||||||
hN = element.name
|
|
||||||
title = to_dtext(element.inner_html)
|
|
||||||
"#{hN}. #{title}\n"
|
|
||||||
when "a"
|
|
||||||
title = to_dtext(element.inner_html)
|
|
||||||
url = element.attributes["href"].value
|
|
||||||
url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
|
|
||||||
%("#{title}":[#{url}]) if title.present?
|
|
||||||
when "img"
|
|
||||||
element.attributes["title"] || element.attributes["alt"] || ""
|
|
||||||
when "comment"
|
|
||||||
# ignored
|
|
||||||
else
|
|
||||||
to_dtext(element.inner_html)
|
|
||||||
end
|
end
|
||||||
end.join
|
end
|
||||||
|
|
||||||
dtext
|
|
||||||
end
|
end
|
||||||
|
|
||||||
protected
|
protected
|
||||||
|
|||||||
@@ -50,26 +50,9 @@ module Sources
|
|||||||
|
|
||||||
protected
|
protected
|
||||||
|
|
||||||
# XXX: duplicated from strategies/deviant_art.rb.
|
|
||||||
def self.to_dtext(text)
|
def self.to_dtext(text)
|
||||||
html = Nokogiri::HTML.fragment(text)
|
text = text.gsub(/\r\n|\r/, "<br>")
|
||||||
|
DText.from_html(text).strip
|
||||||
dtext = html.children.map do |element|
|
|
||||||
case element.name
|
|
||||||
when "text"
|
|
||||||
element.content
|
|
||||||
when "strong"
|
|
||||||
"[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present?
|
|
||||||
when "i"
|
|
||||||
"[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present?
|
|
||||||
when "s"
|
|
||||||
"[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present?
|
|
||||||
else
|
|
||||||
to_dtext(element.inner_html)
|
|
||||||
end
|
|
||||||
end.join
|
|
||||||
|
|
||||||
dtext
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def get_commentary_from_page(page)
|
def get_commentary_from_page(page)
|
||||||
|
|||||||
@@ -55,32 +55,13 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
|
|
||||||
def dtext_artist_commentary_desc
|
def dtext_artist_commentary_desc
|
||||||
to_dtext(artist_commentary_desc)
|
DText.from_html(artist_commentary_desc) do |element|
|
||||||
end
|
if element.name == "a"
|
||||||
|
|
||||||
def to_dtext(text)
|
|
||||||
html = Nokogiri::HTML.fragment(text)
|
|
||||||
|
|
||||||
dtext = html.children.map do |element|
|
|
||||||
case element.name
|
|
||||||
when "text"
|
|
||||||
element.content
|
|
||||||
when "p"
|
|
||||||
to_dtext(element.inner_html) + "\n\n"
|
|
||||||
when "a"
|
|
||||||
# don't include links to the toot itself.
|
# don't include links to the toot itself.
|
||||||
media_urls = api_response.json["media_attachments"].map { |attr| attr["text_url"] }
|
media_urls = api_response.json["media_attachments"].map { |attr| attr["text_url"] }
|
||||||
next if element.attribute("href").value.in?(media_urls)
|
element["href"] = nil if element["href"].in?(media_urls)
|
||||||
|
|
||||||
title = to_dtext(element.inner_html)
|
|
||||||
url = element.attributes["href"].value
|
|
||||||
%("#{title}":[#{url}])
|
|
||||||
else
|
|
||||||
to_dtext(element.inner_html)
|
|
||||||
end
|
end
|
||||||
end.join.strip
|
end.strip
|
||||||
|
|
||||||
dtext
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -92,8 +92,8 @@ module Sources
|
|||||||
|
|
||||||
should "get the dtext-ified commentary" do
|
should "get the dtext-ified commentary" do
|
||||||
desc = <<-EOS.strip_heredoc.chomp
|
desc = <<-EOS.strip_heredoc.chomp
|
||||||
foo [b]bold[/b] [i]italics[/i] [s]strike[/s] red\r
|
foo [b]bold[/b] [i]italics[/i] [s]strike[/s] red
|
||||||
\r
|
|
||||||
http://nijie.info/view.php?id=218944
|
http://nijie.info/view.php?id=218944
|
||||||
EOS
|
EOS
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user