Merge pull request #3137 from evazion/fix-da-artcomms

Autoconvert DeviantArt commentary from HTML to DText
This commit is contained in:
Albert Yi
2017-06-09 15:58:05 -07:00
committed by GitHub
3 changed files with 127 additions and 3 deletions

View File

@@ -86,8 +86,8 @@
Danbooru.ArtistCommentary.from_source = function(source) {
return $.get("/source.json?url=" + encodeURIComponent(source)).then(function(data) {
return {
original_title: data.artist_commentary.title,
original_description: data.artist_commentary.description,
original_title: data.artist_commentary.dtext_title,
original_description: data.artist_commentary.dtext_description,
source: source,
};
});

View File

@@ -24,6 +24,7 @@ module Sources
def get
agent.get(URI.parse(normalized_url)) do |page|
page.encoding = "utf-8"
@artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page)
@@ -32,6 +33,50 @@ module Sources
end
end
def self.to_dtext(text)
html = Nokogiri::HTML.fragment(text)
dtext = html.children.map do |element|
case element.name
when "text"
element.content
when "br"
"\n"
when "blockquote"
"[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present?
when "small", "sub"
"[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present?
when "b"
"[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present?
when "i"
"[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present?
when "u"
"[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present?
when "strike"
"[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present?
when "li"
"* #{to_dtext(element.inner_html)}" if element.inner_html.present?
when "h1", "h2", "h3", "h4", "h5", "h6"
hN = element.name
title = to_dtext(element.inner_html)
"#{hN}. #{title}\n"
when "a"
title = to_dtext(element.inner_html)
url = element.attributes["href"].value
url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
%("#{title}":[#{url}]) if title.present?
when "img"
element.attributes["title"] || element.attributes["alt"] || ""
when "comment"
# ignored
else
to_dtext(element.inner_html)
end
end.join
dtext
end
protected
def get_profile_from_page(page)
@@ -86,7 +131,7 @@ module Sources
desc = page.search("div.dev-description div.text.block")
if desc.any?
desc[0].inner_text
desc[0].children.to_s
end
end