diff --git a/app/logical/artist_finder.rb b/app/logical/artist_finder.rb index a91d0f5e1..306f05fc9 100644 --- a/app/logical/artist_finder.rb +++ b/app/logical/artist_finder.rb @@ -62,6 +62,8 @@ module ArtistFinder "monappy.jp", "monappy.jp/u", # https://monappy.jp/u/abara_bone "mstdn.jp", # https://mstdn.jp/@oneb + "www.newgrounds.com", # https://jessxjess.newgrounds.com/ + "newgrounds.com/art/view/", # https://www.newgrounds.com/art/view/jessxjess/avatar-korra "nicoseiga.jp", "nicoseiga.jp/priv", # http://lohas.nicoseiga.jp/priv/2017365fb6cfbdf47ad26c7b6039feb218c5e2d4/1498430264/6820259 "nicovideo.jp", diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 92232e59e..05a0ef2d5 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -13,7 +13,8 @@ module Sources Strategies::Pawoo, Strategies::Moebooru, Strategies::HentaiFoundry, - Strategies::Weibo + Strategies::Weibo, + Strategies::Newgrounds ] end diff --git a/app/logical/sources/strategies/newgrounds.rb b/app/logical/sources/strategies/newgrounds.rb new file mode 100644 index 000000000..69d5d737a --- /dev/null +++ b/app/logical/sources/strategies/newgrounds.rb @@ -0,0 +1,112 @@ +# Image Urls +# * https://art.ngfiles.com/images/1254000/1254722_natthelich_pandora.jpg +# * https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181 +# * https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg +# +# Page URLs +# * https://www.newgrounds.com/art/view/puddbytes/costanza-at-bat +# * https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic (multiple) +# +# +# Profile URLs +# * https://natthelich.newgrounds.com/ + +module Sources + module Strategies + class Newgrounds < Base + IMAGE_URL = %r{https?://art\.ngfiles\.com/images/\d+/\d+_(?[0-9a-z-]+)_(?[0-9a-z-]+)\.\w+}i + COMMENT_URL = %r{https?://art\.ngfiles\.com/comments/\d+/\w+\.\w+}i + + PAGE_URL = %r{https?://(?:www\.)?newgrounds\.com/art/view/(?[0-9a-z-]+)/(?[0-9a-z-]+)(?:\?.*)?}i + + PROFILE_URL = %r{https?://(?(?!www)[0-9a-z-]+)\.newgrounds\.com(?:/.*)?}i + + def domains + ["newgrounds.com", "ngfiles.com"] + end + + def site_name + "NewGrounds" + end + + def image_urls + if url =~ COMMENT_URL || url =~ IMAGE_URL + [url] + else + urls = [] + + urls += page&.css(".image img").to_a.map { |img| img["src"] } + urls += page&.css("#author_comments img[data-user-image='1']").to_a.map { |img| img["data-smartload-src"] || img["src"] } + + urls.compact + end + end + + def page_url + return nil if illust_title.blank? || user_name.blank? + + "https://www.newgrounds.com/art/view/#{user_name}/#{illust_title}" + end + + def page + return nil if page_url.blank? + doc = Danbooru::Http.cache(1.minute).get(page_url) + + return if doc.code == 404 + + Nokogiri::HTML(doc.body) + end + memoize :page + + def tags + page&.css("#sidestats .tags a").to_a.map do |tag| + [tag.text, "https://www.newgrounds.com/search/conduct/art?match=tags&tags=" + tag.text] + end + end + + def normalize_tag(tag) + tag = tag.tr("-", "_") + super(tag) + end + + def artist_name + name = page&.css(".item-user .item-details h4 a")&.text&.strip || user_name + name&.downcase + end + + def other_names + [artist_name, user_name].compact.uniq + end + + def profile_url + # user names are not mutable, artist names are. + # However we need the latest name for normalization + "https://#{artist_name}.newgrounds.com" + end + + def artist_commentary_title + page&.css(".pod-head > [itemprop='name']")&.text + end + + def artist_commentary_desc + page&.css("#author_comments")&.to_html + end + + def dtext_artist_commentary_desc + DText.from_html(artist_commentary_desc) + end + + def normalize_for_source + page_url + end + + def user_name + urls.map { |u| url[PROFILE_URL, :artist_name] || u[IMAGE_URL, :user_name] || u[PAGE_URL, :user_name] }.compact.first + end + + def illust_title + urls.map { |u| u[IMAGE_URL, :illust_title] || u[PAGE_URL, :illust_title] }.compact.first + end + end + end +end diff --git a/test/unit/sources/newgrounds_test.rb b/test/unit/sources/newgrounds_test.rb new file mode 100644 index 000000000..8c7c57fcb --- /dev/null +++ b/test/unit/sources/newgrounds_test.rb @@ -0,0 +1,112 @@ +require 'test_helper' + +module Sources + class NewGroundsTest < ActiveSupport::TestCase + context "The source for a newgrounds picture" do + setup do + @url = "https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic" + @comment = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg" + @image_1 = Sources::Strategies.find(@url) + @image_2 = Sources::Strategies.find("https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181") + @image_3 = Sources::Strategies.find(@comment, @url) + end + + should "get the artist name" do + assert_equal("natthelich", @image_1.artist_name) + assert_equal("natthelich", @image_2.artist_name) + assert_equal("natthelich", @image_3.artist_name) + end + + should "get the artist commentary title" do + assert_equal("Fire Emblem - Marth (plus progress pic)", @image_1.artist_commentary_title) + assert_equal("Fire Emblem - Marth (plus progress pic)", @image_2.artist_commentary_title) + assert_equal("Fire Emblem - Marth (plus progress pic)", @image_3.artist_commentary_title) + end + + should "get profile url" do + assert_equal("https://natthelich.newgrounds.com", @image_1.profile_url) + assert_equal("https://natthelich.newgrounds.com", @image_2.profile_url) + assert_equal("https://natthelich.newgrounds.com", @image_3.profile_url) + end + + should "get the image urls" do + assert_match(%r{https?://art\.ngfiles\.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic\.png(?:\?\w+)?}i, @image_1.image_url) + assert_includes(@image_1.image_urls, @comment) + + assert_match(%r{https?://art\.ngfiles\.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic\.png(?:\?\w+)?}i, @image_2.image_url) + assert_equal(@comment, @image_3.image_url) + end + + should "get the canonical url" do + assert_equal(@url, @image_1.canonical_url) + assert_equal(@url, @image_2.canonical_url) + assert_equal(@url, @image_3.canonical_url) + end + + should "download an image" do + assert_downloaded(630365, @image_1.image_url) + assert_downloaded(630365, @image_2.image_url) + assert_downloaded(129033, @image_3.image_url) + end + + should "get the tags" do + tags = [ + %w[fire-emblem https://www.newgrounds.com/search/conduct/art?match=tags&tags=fire-emblem], + %w[marth https://www.newgrounds.com/search/conduct/art?match=tags&tags=marth ] + ] + assert_equal(tags, @image_1.tags) + assert_equal(tags, @image_2.tags) + assert_equal(tags, @image_3.tags) + end + + should "find the right artist" do + artist_1 = FactoryBot.create(:artist, name: "natthelich1", url_string: "https://natthelich.newgrounds.com/art") + artist_2 = FactoryBot.create(:artist, name: "natthelich2", url_string: "https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic") + artist_3 = FactoryBot.create(:artist, name: "bad_artist", url_string: "https://www.newgrounds.com/art") + + assert_equal([artist_1, artist_2], @image_1.artists) + assert_equal([artist_1, artist_2], @image_2.artists) + assert_equal([artist_1, artist_2], @image_3.artists) + + assert_not_equal([artist_3], @image_1.artists) + end + end + + context "A deleted or not existing picture" do + setup do + @fake_1 = Sources::Strategies.find("https://www.newgrounds.com/art/view/ThisUser/DoesNotExist") + @artist_1 = FactoryBot.create(:artist, name: "thisuser", url_string: "https://thisuser.newgrounds.com") + + @fake_2 = Sources::Strategies.find("https://www.newgrounds.com/art/view/natthelich/nopicture") + @artist_2 = FactoryBot.create(:artist, name: "natthelich", url_string: "https://natthelich.newgrounds.com") + + @fake_3 = Sources::Strategies.find("https://www.newgrounds.com/art/view/theolebrave/sensitive-pochaco") + @artist_3 = FactoryBot.create(:artist, name: "taffytoad", url_string: "https://taffytoad.newgrounds.com") + end + + should "still find the artist name" do + assert_equal("thisuser", @fake_1.artist_name) + assert_equal([@artist_1], @fake_1.artists) + assert_equal("https://thisuser.newgrounds.com", @fake_1.profile_url) + + assert_equal("natthelich", @fake_2.artist_name) + assert_equal([@artist_2], @fake_2.artists) + + assert_equal([@artist_3], @fake_3.artists) + end + end + + context "normalizing for source" do + should "normalize correctly" do + source = "https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181" + + assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Sources::Strategies.normalize_source(source)) + end + + should "avoid normalizing unnormalizable urls" do + bad_source = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg" + assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + end + end + end +end