sources: factor out site_name method.

This commit is contained in:
evazion
2022-03-11 23:16:36 -06:00
parent b4aea72d04
commit 28971fe103
23 changed files with 48 additions and 150 deletions

View File

@@ -3,6 +3,9 @@
# A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a # A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a
# subclass responsible for parsing and extracting information from URLs for that site. # subclass responsible for parsing and extracting information from URLs for that site.
# #
# Sources::Strategies are the main user of Source::URLs. Each Source::URL subclass usually
# has a corresponding strategy for extracting data from that site.
#
# To add a new site, create a subclass of Source::URL and implement `#match?` to define # To add a new site, create a subclass of Source::URL and implement `#match?` to define
# which URLs belong to the site, and `#parse` to parse and extract information from the URL. # which URLs belong to the site, and `#parse` to parse and extract information from the URL.
# #
@@ -15,6 +18,7 @@
# url.status_id # => "1496123903290314755" # url.status_id # => "1496123903290314755"
# url.twitter_username # => "yasunavert" # url.twitter_username # => "yasunavert"
# #
# @see Danbooru::URL
module Source module Source
class URL < Danbooru::URL class URL < Danbooru::URL
SUBCLASSES = [ SUBCLASSES = [
@@ -68,9 +72,43 @@ module Source
raise NotImplementedError raise NotImplementedError
end end
# @return [String, nil] The name of the site this URL belongs to, or possibly nil if unknown. # The name of the site this URL belongs to.
#
# @return [String]
def site_name def site_name
self.class.name.demodulize.titleize # XXX should go in dedicated subclasses.
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /fc2\.com\z/i
"FC2"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
if self.class == Source::URL
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
else
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
end
end end
protected def initialize(...) protected def initialize(...)

View File

@@ -7,10 +7,6 @@ module Sources::Strategies
Source::URL::ArtStation === parsed_url Source::URL::ArtStation === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) } @image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
end end

View File

@@ -22,6 +22,7 @@ module Sources
DOWNLOAD_TIMEOUT = 60 DOWNLOAD_TIMEOUT = 60
attr_reader :url, :referer_url, :parsed_url, :parsed_referer attr_reader :url, :referer_url, :parsed_url, :parsed_referer
delegate :site_name, to: :parsed_url
# Should return true if all prerequisites for using the strategy are met. # Should return true if all prerequisites for using the strategy are met.
# Return false if the strategy requires api keys that have not been configured. # Return false if the strategy requires api keys that have not been configured.
@@ -53,74 +54,6 @@ module Sources
false false
end end
def site_name
host = parsed_url&.host
# XXX should go in dedicated strategies.
case host
when /amazon\.(com|jp|co\.jp)\z/i
"Amazon"
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i, /doujinshi\.org\z/i
"Doujinshi.org"
when /erogamescape\.dyndns\.org\z/i
"Erogamescape"
when /facebook\.com\z/i
"Facebook"
when /fc2\.com\z/i
"FC2"
when /gumroad\.com\z/i
"Gumroad"
when /instagram\.com\z/i
"Instagram"
when /ko-fi\.com\z/i
"Ko-fi"
when /livedoor\.(jp|com)\z/i
"Livedoor"
when /mangaupdates\.com\z/i
"Mangaupdates"
when /melonbooks\.co\.jp\z/i
"Melonbooks"
when /mihuashi\.com\z/i
"Mihuashi"
when /mixi\.jp\z/i
"Mixi.jp"
when /patreon\.com\z/i
"Patreon"
when /piapro\.jp\z/i
"Piapro.jp"
when /picarto\.tv\z/i
"Picarto"
when /privatter\.net\z/i
"Privatter"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
when /stickam\.jp\z/i
"Stickam"
when /tinami\.com\z/i
"Tinami"
when /toranoana\.(jp|shop)\z/i
"Toranoana"
when /twitch\.tv\z/i
"Twitch"
when /wikipedia\.org\z/i
"Wikipedia"
when /youtube\.com\z/i
"Youtube"
else
host
end
end
# Whatever <tt>url</tt> is, this method should return the direct links # Whatever <tt>url</tt> is, this method should return the direct links
# to the canonical binary files. It should not be an HTML page. It should # to the canonical binary files. It should not be an HTML page. It should
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the

View File

@@ -11,10 +11,6 @@ module Sources
Source::URL::DeviantArt === parsed_url Source::URL::DeviantArt === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
[image_url] [image_url]
end end

View File

@@ -8,10 +8,6 @@ module Sources
Source::URL::Fanbox === parsed_url Source::URL::Fanbox === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -10,10 +10,6 @@ module Sources::Strategies
Source::URL::Fantia === parsed_url Source::URL::Fantia === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
return [parsed_url.full_image_url] if parsed_url.image_url? return [parsed_url.full_image_url] if parsed_url.image_url?
return [image_from_downloadable(parsed_url)] if parsed_url.downloadable? return [image_from_downloadable(parsed_url)] if parsed_url.downloadable?

View File

@@ -8,10 +8,6 @@ module Sources
parsed_url&.site_name == "Foundation" parsed_url&.site_name == "Foundation"
end end
def site_name
"Foundation"
end
def image_urls def image_urls
if parsed_url.full_image_url.present? if parsed_url.full_image_url.present?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -8,10 +8,6 @@ module Sources
parsed_url&.site_name == "Hentai Foundry" parsed_url&.site_name == "Hentai Foundry"
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
image = page&.search("#picBox img") image = page&.search("#picBox img")

View File

@@ -8,10 +8,6 @@ module Sources
parsed_url&.site_name == "Lofter" parsed_url&.site_name == "Lofter"
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -7,10 +7,6 @@ module Sources::Strategies
Source::URL::Mastodon === parsed_url Source::URL::Mastodon === parsed_url
end end
def site_name
parsed_url.site_name
end
def domain def domain
case site_name case site_name
when "Pawoo" then "pawoo.net" when "Pawoo" then "pawoo.net"

View File

@@ -8,10 +8,6 @@ module Sources
parsed_url&.site_name == "Newgrounds" parsed_url&.site_name == "Newgrounds"
end end
def site_name
"Newgrounds"
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[url] [url]

View File

@@ -12,10 +12,6 @@ module Sources
Source::URL::NicoSeiga === parsed_url Source::URL::NicoSeiga === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if image_id.present? if image_id.present?
[image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")] [image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")]

View File

@@ -12,10 +12,6 @@ module Sources
Source::URL::Nijie === parsed_url Source::URL::Nijie === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -36,10 +36,6 @@ module Sources
Source::URL::Pixiv === parsed_url Source::URL::Pixiv === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if is_ugoira? if is_ugoira?
[api_ugoira[:originalSrc]] [api_ugoira[:originalSrc]]

View File

@@ -8,10 +8,6 @@ module Sources
Source::URL::PixivSketch === parsed_url Source::URL::PixivSketch === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -8,10 +8,6 @@ module Sources
Source::URL::Plurk === parsed_url Source::URL::Plurk === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
# * Posts can have up to 10 images. # * Posts can have up to 10 images.
# * Artists commonly post extra images by replying to their own post. # * Artists commonly post extra images by replying to their own post.

View File

@@ -8,10 +8,6 @@ module Sources
parsed_url&.site_name == "Skeb" parsed_url&.site_name == "Skeb"
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[url] [url]

View File

@@ -11,10 +11,6 @@ module Sources::Strategies
Source::URL::Tumblr === parsed_url Source::URL::Tumblr === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
return [find_largest(parsed_url)].compact if parsed_url.asset_url? return [find_largest(parsed_url)].compact if parsed_url.asset_url?

View File

@@ -7,10 +7,6 @@ module Sources::Strategies
parsed_url&.site_name == "TwitPic" parsed_url&.site_name == "TwitPic"
end end
def site_name
parsed_url.site_name
end
def normalize_for_source def normalize_for_source
parsed_url.page_url || url parsed_url.page_url || url
end end

View File

@@ -27,10 +27,6 @@ module Sources::Strategies
parsed_url&.site_name == "Twitter" parsed_url&.site_name == "Twitter"
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
if parsed_url.image_url? if parsed_url.image_url?

View File

@@ -9,10 +9,6 @@ module Sources
Source::URL::Weibo === parsed_url Source::URL::Weibo === parsed_url
end end
def site_name
parsed_url.site_name
end
def image_urls def image_urls
if parsed_url.image_url? if parsed_url.image_url?
[parsed_url.full_image_url] [parsed_url.full_image_url]

View File

@@ -72,12 +72,11 @@ class ArtistURL < ApplicationRecord
end end
def domain def domain
Danbooru::URL.parse(normalized_url)&.domain.to_s parsed_url&.domain.to_s
end end
def site_name def site_name
source = Sources::Strategies.find(normalized_url) parsed_url&.site_name.to_s
source.site_name
end end
# A secondary URL is an artist URL that we don't normally want to display, # A secondary URL is an artist URL that we don't normally want to display,
@@ -123,9 +122,14 @@ class ArtistURL < ApplicationRecord
def url=(url) def url=(url)
super(url) super(url)
@parsed_url = Source::URL.parse(url)
self.normalized_url = self.class.normalize_normalized_url(self.url) self.normalized_url = self.class.normalize_normalized_url(self.url)
end end
def parsed_url
@parsed_url ||= Source::URL.parse(url)
end
def to_s def to_s
if is_active? if is_active?
url url

View File

@@ -12,7 +12,6 @@ module Sources
end end
should "find the metadata" do should "find the metadata" do
assert_equal("oremuhax.x0.com", @site.site_name)
assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls) assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls)
assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url) assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url)
assert_nil(@site.artist_name) assert_nil(@site.artist_name)