Fix #5140: Unexpected error: PublicSuffix::DomainInvalid for searching some newgrounds urls in /artists

When the artist name couldn't found for a Newgrounds URL, for example
for `https://www.newgrounds.com/dump/item`, then the `profile_url`
method erroneously returned `https://.newgrounds.com`. This led to an
error later on when the artist finder tried to parse the invalid URL.

Also fix `strategy_should_work` to test that the profile URL is a valid
URL, and not to try to download the file when image_urls is empty.
This commit is contained in:
evazion
2022-04-22 23:05:47 -05:00
parent db6bb2ccac
commit 76d9e86724
4 changed files with 22 additions and 2 deletions

View File

@@ -60,7 +60,7 @@ module Source
def profile_url
# user names are not mutable, artist names are.
# However we need the latest name for normalization
"https://#{artist_name}.newgrounds.com"
"https://#{artist_name}.newgrounds.com" if artist_name.present?
end
def artist_commentary_title

View File

@@ -20,7 +20,7 @@ module SourceTestHelper
assert_not(strategy.image_urls.include?(nil))
end
should_download_successfully(strategy, download_size) unless deleted
should_download_successfully(strategy, download_size) unless deleted || strategy.image_urls.empty?
# {profile_url: nil}[:profile_url].present? -> false
# Doing it this way instead we can check profile_url even if it's passed as a nil.
@@ -49,6 +49,7 @@ module SourceTestHelper
def should_handle_artists_correctly(strategy, profile_url)
if profile_url.present?
should "correctly match a strategy to an artist with the same profile url" do
assert_not_nil(Danbooru::URL.parse(strategy.profile_url))
assert_equal(profile_url, strategy.profile_url)
artist = FactoryBot.create(:artist, name: strategy.artist_name, url_string: profile_url)
assert_equal([artist], strategy.artists)

View File

@@ -402,6 +402,16 @@ class ArtistTest < ActiveSupport::TestCase
end
end
context "when finding Newgrounds artists" do
should "find the correct artist" do
create(:artist, name: "lasterk", url_string: "http://lasterk.newgrounds.com")
create(:artist, name: "merunyaa", url_string: "https://merunyaa.newgrounds.com")
assert_artist_found("lasterk", "https://www.newgrounds.com/art/view/lasterk/booette")
assert_artist_not_found("https://www.newgrounds.com/dump/item/a1f417d20f5eaef31e26ac3c4956b3d4")
end
end
context "the #normalize_other_names method" do
subject { build(:artist) }

View File

@@ -98,6 +98,15 @@ module Sources
end
end
context "A www.newgrounds.com/dump/item URL" do
strategy_should_work(
"https://www.newgrounds.com/dump/item/a1f417d20f5eaef31e26ac3c4956b3d4",
image_urls: [],
artist_name: nil,
profile_url: nil,
)
end
context "A post with links to other illustrations in the commentary" do
should "not include the links in the commentary" do
@source = Source::Extractor.find("https://www.newgrounds.com/art/view/boxofwant/annie-hughes-1")