sources: factor out site_name method.
This commit is contained in:
@@ -3,6 +3,9 @@
|
|||||||
# A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a
|
# A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a
|
||||||
# subclass responsible for parsing and extracting information from URLs for that site.
|
# subclass responsible for parsing and extracting information from URLs for that site.
|
||||||
#
|
#
|
||||||
|
# Sources::Strategies are the main user of Source::URLs. Each Source::URL subclass usually
|
||||||
|
# has a corresponding strategy for extracting data from that site.
|
||||||
|
#
|
||||||
# To add a new site, create a subclass of Source::URL and implement `#match?` to define
|
# To add a new site, create a subclass of Source::URL and implement `#match?` to define
|
||||||
# which URLs belong to the site, and `#parse` to parse and extract information from the URL.
|
# which URLs belong to the site, and `#parse` to parse and extract information from the URL.
|
||||||
#
|
#
|
||||||
@@ -15,6 +18,7 @@
|
|||||||
# url.status_id # => "1496123903290314755"
|
# url.status_id # => "1496123903290314755"
|
||||||
# url.twitter_username # => "yasunavert"
|
# url.twitter_username # => "yasunavert"
|
||||||
#
|
#
|
||||||
|
# @see Danbooru::URL
|
||||||
module Source
|
module Source
|
||||||
class URL < Danbooru::URL
|
class URL < Danbooru::URL
|
||||||
SUBCLASSES = [
|
SUBCLASSES = [
|
||||||
@@ -68,9 +72,43 @@ module Source
|
|||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
end
|
end
|
||||||
|
|
||||||
# @return [String, nil] The name of the site this URL belongs to, or possibly nil if unknown.
|
# The name of the site this URL belongs to.
|
||||||
|
#
|
||||||
|
# @return [String]
|
||||||
def site_name
|
def site_name
|
||||||
self.class.name.demodulize.titleize
|
# XXX should go in dedicated subclasses.
|
||||||
|
case host
|
||||||
|
when /ask\.fm\z/i
|
||||||
|
"Ask.fm"
|
||||||
|
when /bcy\.net\z/i
|
||||||
|
"BCY"
|
||||||
|
when /booth\.pm\z/i
|
||||||
|
"Booth.pm"
|
||||||
|
when /circle\.ms\z/i
|
||||||
|
"Circle.ms"
|
||||||
|
when /dlsite\.(com|net)\z/i
|
||||||
|
"DLSite"
|
||||||
|
when /doujinshi\.mugimugi\.org\z/i
|
||||||
|
"Doujinshi.org"
|
||||||
|
when /fc2\.com\z/i
|
||||||
|
"FC2"
|
||||||
|
when /ko-fi\.com\z/i
|
||||||
|
"Ko-fi"
|
||||||
|
when /mixi\.jp\z/i
|
||||||
|
"Mixi.jp"
|
||||||
|
when /piapro\.jp\z/i
|
||||||
|
"Piapro.jp"
|
||||||
|
when /sakura\.ne\.jp\z/i
|
||||||
|
"Sakura.ne.jp"
|
||||||
|
else
|
||||||
|
if self.class == Source::URL
|
||||||
|
# "www.melonbooks.co.jp" => "Melonbooks"
|
||||||
|
parsed_domain.sld.titleize
|
||||||
|
else
|
||||||
|
# "Source::URL::NicoSeiga" => "Nico Seiga"
|
||||||
|
self.class.name.demodulize.titleize
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
protected def initialize(...)
|
protected def initialize(...)
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ module Sources::Strategies
|
|||||||
Source::URL::ArtStation === parsed_url
|
Source::URL::ArtStation === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
|
@image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) }
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ module Sources
|
|||||||
DOWNLOAD_TIMEOUT = 60
|
DOWNLOAD_TIMEOUT = 60
|
||||||
|
|
||||||
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
||||||
|
delegate :site_name, to: :parsed_url
|
||||||
|
|
||||||
# Should return true if all prerequisites for using the strategy are met.
|
# Should return true if all prerequisites for using the strategy are met.
|
||||||
# Return false if the strategy requires api keys that have not been configured.
|
# Return false if the strategy requires api keys that have not been configured.
|
||||||
@@ -53,74 +54,6 @@ module Sources
|
|||||||
false
|
false
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
host = parsed_url&.host
|
|
||||||
|
|
||||||
# XXX should go in dedicated strategies.
|
|
||||||
case host
|
|
||||||
when /amazon\.(com|jp|co\.jp)\z/i
|
|
||||||
"Amazon"
|
|
||||||
when /ask\.fm\z/i
|
|
||||||
"Ask.fm"
|
|
||||||
when /bcy\.net\z/i
|
|
||||||
"BCY"
|
|
||||||
when /booth\.pm\z/i
|
|
||||||
"Booth.pm"
|
|
||||||
when /circle\.ms\z/i
|
|
||||||
"Circle.ms"
|
|
||||||
when /dlsite\.(com|net)\z/i
|
|
||||||
"DLSite"
|
|
||||||
when /doujinshi\.mugimugi\.org\z/i, /doujinshi\.org\z/i
|
|
||||||
"Doujinshi.org"
|
|
||||||
when /erogamescape\.dyndns\.org\z/i
|
|
||||||
"Erogamescape"
|
|
||||||
when /facebook\.com\z/i
|
|
||||||
"Facebook"
|
|
||||||
when /fc2\.com\z/i
|
|
||||||
"FC2"
|
|
||||||
when /gumroad\.com\z/i
|
|
||||||
"Gumroad"
|
|
||||||
when /instagram\.com\z/i
|
|
||||||
"Instagram"
|
|
||||||
when /ko-fi\.com\z/i
|
|
||||||
"Ko-fi"
|
|
||||||
when /livedoor\.(jp|com)\z/i
|
|
||||||
"Livedoor"
|
|
||||||
when /mangaupdates\.com\z/i
|
|
||||||
"Mangaupdates"
|
|
||||||
when /melonbooks\.co\.jp\z/i
|
|
||||||
"Melonbooks"
|
|
||||||
when /mihuashi\.com\z/i
|
|
||||||
"Mihuashi"
|
|
||||||
when /mixi\.jp\z/i
|
|
||||||
"Mixi.jp"
|
|
||||||
when /patreon\.com\z/i
|
|
||||||
"Patreon"
|
|
||||||
when /piapro\.jp\z/i
|
|
||||||
"Piapro.jp"
|
|
||||||
when /picarto\.tv\z/i
|
|
||||||
"Picarto"
|
|
||||||
when /privatter\.net\z/i
|
|
||||||
"Privatter"
|
|
||||||
when /sakura\.ne\.jp\z/i
|
|
||||||
"Sakura.ne.jp"
|
|
||||||
when /stickam\.jp\z/i
|
|
||||||
"Stickam"
|
|
||||||
when /tinami\.com\z/i
|
|
||||||
"Tinami"
|
|
||||||
when /toranoana\.(jp|shop)\z/i
|
|
||||||
"Toranoana"
|
|
||||||
when /twitch\.tv\z/i
|
|
||||||
"Twitch"
|
|
||||||
when /wikipedia\.org\z/i
|
|
||||||
"Wikipedia"
|
|
||||||
when /youtube\.com\z/i
|
|
||||||
"Youtube"
|
|
||||||
else
|
|
||||||
host
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
# Whatever <tt>url</tt> is, this method should return the direct links
|
# Whatever <tt>url</tt> is, this method should return the direct links
|
||||||
# to the canonical binary files. It should not be an HTML page. It should
|
# to the canonical binary files. It should not be an HTML page. It should
|
||||||
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
|
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
|
||||||
|
|||||||
@@ -11,10 +11,6 @@ module Sources
|
|||||||
Source::URL::DeviantArt === parsed_url
|
Source::URL::DeviantArt === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
[image_url]
|
[image_url]
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
Source::URL::Fanbox === parsed_url
|
Source::URL::Fanbox === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -10,10 +10,6 @@ module Sources::Strategies
|
|||||||
Source::URL::Fantia === parsed_url
|
Source::URL::Fantia === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
return [parsed_url.full_image_url] if parsed_url.image_url?
|
return [parsed_url.full_image_url] if parsed_url.image_url?
|
||||||
return [image_from_downloadable(parsed_url)] if parsed_url.downloadable?
|
return [image_from_downloadable(parsed_url)] if parsed_url.downloadable?
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
parsed_url&.site_name == "Foundation"
|
parsed_url&.site_name == "Foundation"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
"Foundation"
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.full_image_url.present?
|
if parsed_url.full_image_url.present?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
parsed_url&.site_name == "Hentai Foundry"
|
parsed_url&.site_name == "Hentai Foundry"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
image = page&.search("#picBox img")
|
image = page&.search("#picBox img")
|
||||||
|
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
parsed_url&.site_name == "Lofter"
|
parsed_url&.site_name == "Lofter"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ module Sources::Strategies
|
|||||||
Source::URL::Mastodon === parsed_url
|
Source::URL::Mastodon === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def domain
|
def domain
|
||||||
case site_name
|
case site_name
|
||||||
when "Pawoo" then "pawoo.net"
|
when "Pawoo" then "pawoo.net"
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
parsed_url&.site_name == "Newgrounds"
|
parsed_url&.site_name == "Newgrounds"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
"Newgrounds"
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[url]
|
[url]
|
||||||
|
|||||||
@@ -12,10 +12,6 @@ module Sources
|
|||||||
Source::URL::NicoSeiga === parsed_url
|
Source::URL::NicoSeiga === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if image_id.present?
|
if image_id.present?
|
||||||
[image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")]
|
[image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")]
|
||||||
|
|||||||
@@ -12,10 +12,6 @@ module Sources
|
|||||||
Source::URL::Nijie === parsed_url
|
Source::URL::Nijie === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -36,10 +36,6 @@ module Sources
|
|||||||
Source::URL::Pixiv === parsed_url
|
Source::URL::Pixiv === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if is_ugoira?
|
if is_ugoira?
|
||||||
[api_ugoira[:originalSrc]]
|
[api_ugoira[:originalSrc]]
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
Source::URL::PixivSketch === parsed_url
|
Source::URL::PixivSketch === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
Source::URL::Plurk === parsed_url
|
Source::URL::Plurk === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
# * Posts can have up to 10 images.
|
# * Posts can have up to 10 images.
|
||||||
# * Artists commonly post extra images by replying to their own post.
|
# * Artists commonly post extra images by replying to their own post.
|
||||||
|
|||||||
@@ -8,10 +8,6 @@ module Sources
|
|||||||
parsed_url&.site_name == "Skeb"
|
parsed_url&.site_name == "Skeb"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[url]
|
[url]
|
||||||
|
|||||||
@@ -11,10 +11,6 @@ module Sources::Strategies
|
|||||||
Source::URL::Tumblr === parsed_url
|
Source::URL::Tumblr === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
return [find_largest(parsed_url)].compact if parsed_url.asset_url?
|
return [find_largest(parsed_url)].compact if parsed_url.asset_url?
|
||||||
|
|
||||||
|
|||||||
@@ -7,10 +7,6 @@ module Sources::Strategies
|
|||||||
parsed_url&.site_name == "TwitPic"
|
parsed_url&.site_name == "TwitPic"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def normalize_for_source
|
def normalize_for_source
|
||||||
parsed_url.page_url || url
|
parsed_url.page_url || url
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -27,10 +27,6 @@ module Sources::Strategies
|
|||||||
parsed_url&.site_name == "Twitter"
|
parsed_url&.site_name == "Twitter"
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
|
# https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
|
|||||||
@@ -9,10 +9,6 @@ module Sources
|
|||||||
Source::URL::Weibo === parsed_url
|
Source::URL::Weibo === parsed_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
|
||||||
parsed_url.site_name
|
|
||||||
end
|
|
||||||
|
|
||||||
def image_urls
|
def image_urls
|
||||||
if parsed_url.image_url?
|
if parsed_url.image_url?
|
||||||
[parsed_url.full_image_url]
|
[parsed_url.full_image_url]
|
||||||
|
|||||||
@@ -72,12 +72,11 @@ class ArtistURL < ApplicationRecord
|
|||||||
end
|
end
|
||||||
|
|
||||||
def domain
|
def domain
|
||||||
Danbooru::URL.parse(normalized_url)&.domain.to_s
|
parsed_url&.domain.to_s
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
def site_name
|
||||||
source = Sources::Strategies.find(normalized_url)
|
parsed_url&.site_name.to_s
|
||||||
source.site_name
|
|
||||||
end
|
end
|
||||||
|
|
||||||
# A secondary URL is an artist URL that we don't normally want to display,
|
# A secondary URL is an artist URL that we don't normally want to display,
|
||||||
@@ -123,9 +122,14 @@ class ArtistURL < ApplicationRecord
|
|||||||
|
|
||||||
def url=(url)
|
def url=(url)
|
||||||
super(url)
|
super(url)
|
||||||
|
@parsed_url = Source::URL.parse(url)
|
||||||
self.normalized_url = self.class.normalize_normalized_url(self.url)
|
self.normalized_url = self.class.normalize_normalized_url(self.url)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def parsed_url
|
||||||
|
@parsed_url ||= Source::URL.parse(url)
|
||||||
|
end
|
||||||
|
|
||||||
def to_s
|
def to_s
|
||||||
if is_active?
|
if is_active?
|
||||||
url
|
url
|
||||||
|
|||||||
@@ -12,7 +12,6 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
should "find the metadata" do
|
should "find the metadata" do
|
||||||
assert_equal("oremuhax.x0.com", @site.site_name)
|
|
||||||
assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls)
|
assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls)
|
||||||
assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url)
|
assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url)
|
||||||
assert_nil(@site.artist_name)
|
assert_nil(@site.artist_name)
|
||||||
|
|||||||
Reference in New Issue
Block a user