`normalize_for_source` was used to convert image URLs to page URLs when displaying sources on the post show page. Move all the code for converting image URLs to page URLs from `Sources::Strategies#normalize_for_source` to `Source::URL#page_url`. Before we had to be very careful in source strategies not to make any network calls in `normalize_for_source`, since it was used in the view for the post show page. Now all the code for generating page URLs is isolated in Source::URL, which makes source strategies simpler. It also makes it easier to check if a source is an image URL or page URL, and if the image URL is convertible to a page URL, which will make autotagging bad_link or bad_source feasible. Finally, this fixes it to generate better page URLs in a handful of cases: * https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP * https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6 * http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677 * https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png * https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
97 lines
2.3 KiB
Ruby
97 lines
2.3 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# @see Source::URL::Newgrounds
|
|
module Sources
|
|
module Strategies
|
|
class Newgrounds < Base
|
|
def match?
|
|
Source::URL::Newgrounds === parsed_url
|
|
end
|
|
|
|
def image_urls
|
|
if parsed_url.image_url?
|
|
[url]
|
|
else
|
|
urls = []
|
|
|
|
urls += page&.css(".image img").to_a.map { |img| img["src"] }
|
|
urls += page&.css("#author_comments img[data-user-image='1']").to_a.map { |img| img["data-smartload-src"] || img["src"] }
|
|
|
|
urls.compact
|
|
end
|
|
end
|
|
|
|
def page_url
|
|
return nil if illust_title.blank? || user_name.blank?
|
|
|
|
"https://www.newgrounds.com/art/view/#{user_name}/#{illust_title}"
|
|
end
|
|
|
|
def page
|
|
return nil if page_url.blank?
|
|
|
|
response = http.cache(1.minute).get(page_url)
|
|
return nil if response.status == 404
|
|
|
|
response.parse
|
|
end
|
|
memoize :page
|
|
|
|
def tags
|
|
page&.css("#sidestats .tags a").to_a.map do |tag|
|
|
[tag.text, "https://www.newgrounds.com/search/conduct/art?match=tags&tags=" + tag.text]
|
|
end
|
|
end
|
|
|
|
def normalize_tag(tag)
|
|
tag = tag.tr("-", "_")
|
|
super(tag)
|
|
end
|
|
|
|
def artist_name
|
|
name = page&.css(".item-user .item-details h4 a")&.text&.strip || user_name
|
|
name&.downcase
|
|
end
|
|
|
|
def other_names
|
|
[artist_name, user_name].compact.uniq
|
|
end
|
|
|
|
def profile_url
|
|
# user names are not mutable, artist names are.
|
|
# However we need the latest name for normalization
|
|
"https://#{artist_name}.newgrounds.com"
|
|
end
|
|
|
|
def artist_commentary_title
|
|
page&.css(".pod-head > [itemprop='name']")&.text
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
page&.css("#author_comments")&.to_html
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
DText.from_html(artist_commentary_desc)
|
|
end
|
|
|
|
# The image url should be the post source, if we can generate the page url from the image url.
|
|
def canonical_url
|
|
if page_url.present?
|
|
url
|
|
else
|
|
page_url
|
|
end
|
|
end
|
|
|
|
def user_name
|
|
parsed_url.username || parsed_referer&.username
|
|
end
|
|
|
|
def illust_title
|
|
parsed_url.work_title || parsed_referer&.work_title
|
|
end
|
|
end
|
|
end
|
|
end
|