diff --git a/app/assets/javascripts/artist_commentaries.js b/app/assets/javascripts/artist_commentaries.js index a2c96a76c..4b72d71d7 100644 --- a/app/assets/javascripts/artist_commentaries.js +++ b/app/assets/javascripts/artist_commentaries.js @@ -86,8 +86,8 @@ Danbooru.ArtistCommentary.from_source = function(source) { return $.get("/source.json?url=" + encodeURIComponent(source)).then(function(data) { return { - original_title: data.artist_commentary.title, - original_description: data.artist_commentary.description, + original_title: data.artist_commentary.dtext_title, + original_description: data.artist_commentary.dtext_description, source: source, }; }); diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 42023befd..5376ffeab 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -24,6 +24,7 @@ module Sources def get agent.get(URI.parse(normalized_url)) do |page| + page.encoding = "utf-8" @artist_name, @profile_url = get_profile_from_page(page) @image_url = get_image_url_from_page(page) @tags = get_tags_from_page(page) @@ -32,6 +33,50 @@ module Sources end end + def self.to_dtext(text) + html = Nokogiri::HTML.fragment(text) + + dtext = html.children.map do |element| + case element.name + when "text" + element.content + when "br" + "\n" + when "blockquote" + "[quote]#{to_dtext(element.inner_html)}[/quote]" if element.inner_html.present? + when "small", "sub" + "[tn]#{to_dtext(element.inner_html)}[/tn]" if element.inner_html.present? + when "b" + "[b]#{to_dtext(element.inner_html)}[/b]" if element.inner_html.present? + when "i" + "[i]#{to_dtext(element.inner_html)}[/i]" if element.inner_html.present? + when "u" + "[u]#{to_dtext(element.inner_html)}[/u]" if element.inner_html.present? + when "strike" + "[s]#{to_dtext(element.inner_html)}[/s]" if element.inner_html.present? + when "li" + "* #{to_dtext(element.inner_html)}" if element.inner_html.present? + when "h1", "h2", "h3", "h4", "h5", "h6" + hN = element.name + title = to_dtext(element.inner_html) + "#{hN}. #{title}\n" + when "a" + title = to_dtext(element.inner_html) + url = element.attributes["href"].value + url = url.gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "") + %("#{title}":[#{url}]) if title.present? + when "img" + element.attributes["title"] || element.attributes["alt"] || "" + when "comment" + # ignored + else + to_dtext(element.inner_html) + end + end.join + + dtext + end + protected def get_profile_from_page(page) @@ -86,7 +131,7 @@ module Sources desc = page.search("div.dev-description div.text.block") if desc.any? - desc[0].inner_text + desc[0].children.to_s end end diff --git a/test/unit/sources/deviantart_test.rb b/test/unit/sources/deviantart_test.rb new file mode 100644 index 000000000..ce7ead7f5 --- /dev/null +++ b/test/unit/sources/deviantart_test.rb @@ -0,0 +1,79 @@ +require 'test_helper' + +module Sources + class DeviantArtTest < ActiveSupport::TestCase + context "The source for an DeviantArt artwork page" do + setup do + @site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") + @site.get + end + + should "get the image url" do + assert_equal("http://orig02.deviantart.net/7b5b/f/2017/160/c/5/test_post_please_ignore_by_noizave-dbc3a48.png", @site.image_url) + end + + should "get the profile" do + assert_equal("http://noizave.deviantart.com/", @site.profile_url) + end + + should "get the artist name" do + assert_equal("noizave", @site.artist_name) + end + + should "get the tags" do + assert_equal(%w[bar baz foo], @site.tags.map(&:first)) + end + + should "get the artist commentary" do + title = "test post please ignore" + desc = "
this is a quote\n