Merge pull request #3137 from evazion/fix-da-artcomms

Autoconvert DeviantArt commentary from HTML to DText
This commit is contained in:
Albert Yi
2017-06-09 15:58:05 -07:00
committed by GitHub
3 changed files with 127 additions and 3 deletions

View File

@@ -86,8 +86,8 @@
Danbooru.ArtistCommentary.from_source = function(source) {
return $.get("/source.json?url=" + encodeURIComponent(source)).then(function(data) {
return {
original_title: data.artist_commentary.title,
original_description: data.artist_commentary.description,
original_title: data.artist_commentary.dtext_title,
original_description: data.artist_commentary.dtext_description,
source: source,
};
});

View File

@@ -24,6 +24,7 @@ module Sources
def get
agent.get(URI.parse(normalized_url)) do |page|
page.encoding = "utf-8"
@artist_name, @profile_url = get_profile_from_page(page)
@image_url = get_image_url_from_page(page)
@tags = get_tags_from_page(page)
@@ -32,6 +33,50 @@ module Sources
end
end
def self.to_dtext(text)
html = Nokogiri::HTML.fragment(text)
dtext = html.children.map do |element|
case element.name
when "text"
element.content
when "br"
"\n"
when "blockquote"
"[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present?
when "small", "sub"
"[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present?
when "b"
"[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present?
when "i"
"[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present?
when "u"
"[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present?
when "strike"
"[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present?
when "li"
"* #{to_dtext(element.inner_html)}" if element.inner_html.present?
when "h1", "h2", "h3", "h4", "h5", "h6"
hN = element.name
title = to_dtext(element.inner_html)
"#{hN}. #{title}\n"
when "a"
title = to_dtext(element.inner_html)
url = element.attributes["href"].value
url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
%("#{title}":[#{url}]) if title.present?
when "img"
element.attributes["title"] || element.attributes["alt"] || ""
when "comment"
# ignored
else
to_dtext(element.inner_html)
end
end.join
dtext
end
protected
def get_profile_from_page(page)
@@ -86,7 +131,7 @@ module Sources
desc = page.search("div.dev-description div.text.block")
if desc.any?
desc[0].inner_text
desc[0].children.to_s
end
end

View File

@@ -0,0 +1,79 @@
require 'test_helper'
module Sources
class DeviantArtTest < ActiveSupport::TestCase
context "The source for an DeviantArt artwork page" do
setup do
@site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408")
@site.get
end
should "get the image url" do
assert_equal("http://orig02.deviantart.net/7b5b/f/2017/160/c/5/test_post_please_ignore_by_noizave-dbc3a48.png", @site.image_url)
end
should "get the profile" do
assert_equal("http://noizave.deviantart.com/", @site.profile_url)
end
should "get the artist name" do
assert_equal("noizave", @site.artist_name)
end
should "get the tags" do
assert_equal(%w[bar baz foo], @site.tags.map(&:first))
end
should "get the artist commentary" do
title = "test post please ignore"
desc = "<div align=\"center\"><span>blah blah<br><div align=\"left\">\n<a class=\"external\" href=\"http://www.deviantart.com/users/outgoing?http://www.google.com\">test link</a><br>\n</div></span></div>\n<br><h1>lol</h1>\n<br><br><b>blah</b> <i>blah</i> <u>blah</u> <strike>blah</strike><br>herp derp<br><br><blockquote>this is a quote</blockquote>\n<ol>\n<li>one</li>\n<li>two</li>\n<li>three</li>\n</ol>\n<ul>\n<li>one</li>\n<li>two</li>\n<li>three</li>\n</ul>\n<img src=\"http://e.deviantart.net/emoticons/h/heart.gif\" alt=\"Heart\" style=\"width: 15px; height: 13px;\" data-embed-type=\"emoticon\" data-embed-id=\"357\">  "
assert_equal(title, @site.artist_commentary_title)
assert_equal(desc, @site.artist_commentary_desc)
end
should "get the dtext-ified commentary" do
desc = <<-EOS.strip_heredoc.chomp
blah blah
"test link":[http://www.google.com]
h1. lol
[b]blah[/b] [i]blah[/i] [u]blah[/u] [s]blah[/s]
herp derp
[quote]this is a quote[/quote]
* one
* two
* three
* one
* two
* three
Heart  
EOS
assert_equal(desc, @site.dtext_artist_commentary_desc)
end
end
context "The source for a login-only DeviantArt artwork page" do
setup do
@site = Sources::Site.new("http://noizave.deviantart.com/art/hidden-work-685458369")
@site.get
end
should_eventually "get the image url" do
assert_equal("http://orig14.deviantart.net/cb25/f/2017/160/1/9/hidden_work_by_noizave-dbc3r29.png", @site.image_url)
end
end
end
end