Fix #5140: Unexpected error: PublicSuffix::DomainInvalid for searching some newgrounds urls in /artists

When the artist name couldn't found for a Newgrounds URL, for example
for `https://www.newgrounds.com/dump/item`, then the `profile_url`
method erroneously returned `https://.newgrounds.com`. This led to an
error later on when the artist finder tried to parse the invalid URL.

Also fix `strategy_should_work` to test that the profile URL is a valid
URL, and not to try to download the file when image_urls is empty.
This commit is contained in:
evazion
2022-04-22 23:05:47 -05:00
parent db6bb2ccac
commit 76d9e86724
4 changed files with 22 additions and 2 deletions

View File

@@ -60,7 +60,7 @@ module Source
def profile_url def profile_url
# user names are not mutable, artist names are. # user names are not mutable, artist names are.
# However we need the latest name for normalization # However we need the latest name for normalization
"https://#{artist_name}.newgrounds.com" "https://#{artist_name}.newgrounds.com" if artist_name.present?
end end
def artist_commentary_title def artist_commentary_title

View File

@@ -20,7 +20,7 @@ module SourceTestHelper
assert_not(strategy.image_urls.include?(nil)) assert_not(strategy.image_urls.include?(nil))
end end
should_download_successfully(strategy, download_size) unless deleted should_download_successfully(strategy, download_size) unless deleted || strategy.image_urls.empty?
# {profile_url: nil}[:profile_url].present? -> false # {profile_url: nil}[:profile_url].present? -> false
# Doing it this way instead we can check profile_url even if it's passed as a nil. # Doing it this way instead we can check profile_url even if it's passed as a nil.
@@ -49,6 +49,7 @@ module SourceTestHelper
def should_handle_artists_correctly(strategy, profile_url) def should_handle_artists_correctly(strategy, profile_url)
if profile_url.present? if profile_url.present?
should "correctly match a strategy to an artist with the same profile url" do should "correctly match a strategy to an artist with the same profile url" do
assert_not_nil(Danbooru::URL.parse(strategy.profile_url))
assert_equal(profile_url, strategy.profile_url) assert_equal(profile_url, strategy.profile_url)
artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url) artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url)
assert_equal([artist], strategy.artists) assert_equal([artist], strategy.artists)

View File

@@ -402,6 +402,16 @@ class ArtistTest < ActiveSupport::TestCase
end end
end end
context "when finding Newgrounds artists" do
should "find the correct artist" do
create(:artist, name: "lasterk", url_string: "http://lasterk.newgrounds.com")
create(:artist, name: "merunyaa", url_string: "https://merunyaa.newgrounds.com")
assert_artist_found("lasterk", "https://www.newgrounds.com/art/view/lasterk/booette")
assert_artist_not_found("https://www.newgrounds.com/dump/item/a1f417d20f5eaef31e26ac3c4956b3d4")
end
end
context "the #normalize_other_names method" do context "the #normalize_other_names method" do
subject { build(:artist) } subject { build(:artist) }

View File

@@ -98,6 +98,15 @@ module Sources
end end
end end
context "A www.newgrounds.com/dump/item URL" do
strategy_should_work(
"https://www.newgrounds.com/dump/item/a1f417d20f5eaef31e26ac3c4956b3d4",
image_urls: [],
artist_name: nil,
profile_url: nil,
)
end
context "A post with links to other illustrations in the commentary" do context "A post with links to other illustrations in the commentary" do
should "not include the links in the commentary" do should "not include the links in the commentary" do
@source = Source::Extractor.find("https://www.newgrounds.com/art/view/boxofwant/annie-hughes-1") @source = Source::Extractor.find("https://www.newgrounds.com/art/view/boxofwant/annie-hughes-1")