From 42aed849754bdf7d262278a0c833082f99e624db Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 9 Jun 2017 09:50:27 -0500 Subject: [PATCH 1/4] /posts/$id: fix artcomm dialog to use dtext-ified commentary. --- app/assets/javascripts/artist_commentaries.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/assets/javascripts/artist_commentaries.js b/app/assets/javascripts/artist_commentaries.js index a2c96a76c..4b72d71d7 100644 --- a/app/assets/javascripts/artist_commentaries.js +++ b/app/assets/javascripts/artist_commentaries.js @@ -86,8 +86,8 @@ Danbooru.ArtistCommentary.from_source = function(source) { return $.get("/source.json?url=" + encodeURIComponent(source)).then(function(data) { return { - original_title: data.artist_commentary.title, - original_description: data.artist_commentary.description, + original_title: data.artist_commentary.dtext_title, + original_description: data.artist_commentary.dtext_description, source: source, }; }); From ccacfb40be914f4a943ff5978f0e2e5c42cc43fa Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 9 Jun 2017 09:49:02 -0500 Subject: [PATCH 2/4] deviantart: translate artist commentary from html to dtext. --- app/logical/sources/strategies/deviant_art.rb | 46 ++++++++++++++++++- 1 file changed, 45 insertions(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 42023befd..6e10aed97 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -32,6 +32,50 @@ module Sources end end + def self.to_dtext(text) + html = Nokogiri::HTML.fragment(text) + + dtext = html.children.map do |element| + case element.name + when "text" + element.content + when "br" + "\n" + when "blockquote" + "[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present? + when "small", "sub" + "[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present? + when "b" + "[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present? + when "i" + "[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present? + when "u" + "[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present? + when "strike" + "[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present? + when "li" + "* #{to_dtext(element.inner_html)}" if element.inner_html.present? + when "h1", "h2", "h3", "h4", "h5", "h6" + hN = element.name + title = to_dtext(element.inner_html) + "#{hN}. #{title}\n" + when "a" + title = to_dtext(element.inner_html) + url = element.attributes["href"].value + url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "") + %("#{title}":[#{url}]) if title.present? + when "img" + element.attributes["title"] || element.attributes["alt"] || "" + when "comment" + # ignored + else + to_dtext(element.inner_html) + end + end.join + + dtext + end + protected def get_profile_from_page(page) @@ -86,7 +130,7 @@ module Sources desc = page.search("div.dev-description div.text.block") if desc.any? - desc[0].inner_text + desc[0].children.to_s end end From 2fcd823effe660057c8d883be4664546a29a860b Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 9 Jun 2017 12:28:31 -0500 Subject: [PATCH 3/4] deviantart: add source tests. --- test/unit/sources/deviantart_test.rb | 79 ++++++++++++++++++++++++++++ 1 file changed, 79 insertions(+) create mode 100644 test/unit/sources/deviantart_test.rb diff --git a/test/unit/sources/deviantart_test.rb b/test/unit/sources/deviantart_test.rb new file mode 100644 index 000000000..ce7ead7f5 --- /dev/null +++ b/test/unit/sources/deviantart_test.rb @@ -0,0 +1,79 @@ +require 'test_helper' + +module Sources + class DeviantArtTest < ActiveSupport::TestCase + context "The source for an DeviantArt artwork page" do + setup do + @site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") + @site.get + end + + should "get the image url" do + assert_equal("http://orig02.deviantart.net/7b5b/f/2017/160/c/5/test_post_please_ignore_by_noizave-dbc3a48.png", @site.image_url) + end + + should "get the profile" do + assert_equal("http://noizave.deviantart.com/", @site.profile_url) + end + + should "get the artist name" do + assert_equal("noizave", @site.artist_name) + end + + should "get the tags" do + assert_equal(%w[bar baz foo], @site.tags.map(&:first)) + end + + should "get the artist commentary" do + title = "test post please ignore" + desc = "
blah blah
\n

lol

\n

blah blah blah blah
herp derp

this is a quote
\n
    \n
  1. one
  2. \n
  3. two
  4. \n
  5. three
  6. \n
\n
    \n
  • one
  • \n
  • two
  • \n
  • three
  • \n
\n\"Heart\"  " + + assert_equal(title, @site.artist_commentary_title) + assert_equal(desc, @site.artist_commentary_desc) + end + + should "get the dtext-ified commentary" do + desc = <<-EOS.strip_heredoc.chomp + blah blah + + "test link":[http://www.google.com] + + + + h1. lol + + + + [b]blah[/b] [i]blah[/i] [u]blah[/u] [s]blah[/s] + herp derp + + [quote]this is a quote[/quote] + + * one + * two + * three + + + * one + * two + * three + + Heart   + EOS + + assert_equal(desc, @site.dtext_artist_commentary_desc) + end + end + + context "The source for a login-only DeviantArt artwork page" do + setup do + @site = Sources::Site.new("http://noizave.deviantart.com/art/hidden-work-685458369") + @site.get + end + + should_eventually "get the image url" do + assert_equal("http://orig14.deviantart.net/cb25/f/2017/160/1/9/hidden_work_by_noizave-dbc3r29.png", @site.image_url) + end + end + end +end From 82c75372704d5b64440be979a84db80f56f6d921 Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 9 Jun 2017 12:25:08 -0500 Subject: [PATCH 4/4] deviantart: force commentary encoding to utf-8. --- app/logical/sources/strategies/deviant_art.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 6e10aed97..5376ffeab 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -24,6 +24,7 @@ module Sources def get agent.get(URI.parse(normalized_url)) do |page| + page.encoding = "utf-8" @artist_name, @profile_url = get_profile_from_page(page) @image_url = get_image_url_from_page(page) @tags = get_tags_from_page(page)