diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index a054dbe53..9316d9c56 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -3,6 +3,9 @@ # A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a # subclass responsible for parsing and extracting information from URLs for that site. # +# Sources::Strategies are the main user of Source::URLs. Each Source::URL subclass usually +# has a corresponding strategy for extracting data from that site. +# # To add a new site, create a subclass of Source::URL and implement `#match?` to define # which URLs belong to the site, and `#parse` to parse and extract information from the URL. # @@ -15,6 +18,7 @@ # url.status_id # => "1496123903290314755" # url.twitter_username # => "yasunavert" # +# @see Danbooru::URL module Source class URL < Danbooru::URL SUBCLASSES = [ @@ -68,9 +72,43 @@ module Source raise NotImplementedError end - # @return [String, nil] The name of the site this URL belongs to, or possibly nil if unknown. + # The name of the site this URL belongs to. + # + # @return [String] def site_name - self.class.name.demodulize.titleize + # XXX should go in dedicated subclasses. + case host + when /ask\.fm\z/i + "Ask.fm" + when /bcy\.net\z/i + "BCY" + when /booth\.pm\z/i + "Booth.pm" + when /circle\.ms\z/i + "Circle.ms" + when /dlsite\.(com|net)\z/i + "DLSite" + when /doujinshi\.mugimugi\.org\z/i + "Doujinshi.org" + when /fc2\.com\z/i + "FC2" + when /ko-fi\.com\z/i + "Ko-fi" + when /mixi\.jp\z/i + "Mixi.jp" + when /piapro\.jp\z/i + "Piapro.jp" + when /sakura\.ne\.jp\z/i + "Sakura.ne.jp" + else + if self.class == Source::URL + # "www.melonbooks.co.jp" => "Melonbooks" + parsed_domain.sld.titleize + else + # "Source::URL::NicoSeiga" => "Nico Seiga" + self.class.name.demodulize.titleize + end + end end protected def initialize(...) diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index 2eb35f5ef..4ff5f165e 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -7,10 +7,6 @@ module Sources::Strategies Source::URL::ArtStation === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls @image_urls ||= image_urls_sub.map { |asset| asset_url(asset, :largest) } end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 5aecd8761..9f7383ba0 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -22,6 +22,7 @@ module Sources DOWNLOAD_TIMEOUT = 60 attr_reader :url, :referer_url, :parsed_url, :parsed_referer + delegate :site_name, to: :parsed_url # Should return true if all prerequisites for using the strategy are met. # Return false if the strategy requires api keys that have not been configured. @@ -53,74 +54,6 @@ module Sources false end - def site_name - host = parsed_url&.host - - # XXX should go in dedicated strategies. - case host - when /amazon\.(com|jp|co\.jp)\z/i - "Amazon" - when /ask\.fm\z/i - "Ask.fm" - when /bcy\.net\z/i - "BCY" - when /booth\.pm\z/i - "Booth.pm" - when /circle\.ms\z/i - "Circle.ms" - when /dlsite\.(com|net)\z/i - "DLSite" - when /doujinshi\.mugimugi\.org\z/i, /doujinshi\.org\z/i - "Doujinshi.org" - when /erogamescape\.dyndns\.org\z/i - "Erogamescape" - when /facebook\.com\z/i - "Facebook" - when /fc2\.com\z/i - "FC2" - when /gumroad\.com\z/i - "Gumroad" - when /instagram\.com\z/i - "Instagram" - when /ko-fi\.com\z/i - "Ko-fi" - when /livedoor\.(jp|com)\z/i - "Livedoor" - when /mangaupdates\.com\z/i - "Mangaupdates" - when /melonbooks\.co\.jp\z/i - "Melonbooks" - when /mihuashi\.com\z/i - "Mihuashi" - when /mixi\.jp\z/i - "Mixi.jp" - when /patreon\.com\z/i - "Patreon" - when /piapro\.jp\z/i - "Piapro.jp" - when /picarto\.tv\z/i - "Picarto" - when /privatter\.net\z/i - "Privatter" - when /sakura\.ne\.jp\z/i - "Sakura.ne.jp" - when /stickam\.jp\z/i - "Stickam" - when /tinami\.com\z/i - "Tinami" - when /toranoana\.(jp|shop)\z/i - "Toranoana" - when /twitch\.tv\z/i - "Twitch" - when /wikipedia\.org\z/i - "Wikipedia" - when /youtube\.com\z/i - "Youtube" - else - host - end - end - # Whatever url is, this method should return the direct links # to the canonical binary files. It should not be an HTML page. It should # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index f1a13f5a1..364276b1d 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -11,10 +11,6 @@ module Sources Source::URL::DeviantArt === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls [image_url] end diff --git a/app/logical/sources/strategies/fanbox.rb b/app/logical/sources/strategies/fanbox.rb index ca7f65563..7ae584b37 100644 --- a/app/logical/sources/strategies/fanbox.rb +++ b/app/logical/sources/strategies/fanbox.rb @@ -8,10 +8,6 @@ module Sources Source::URL::Fanbox === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [parsed_url.full_image_url] diff --git a/app/logical/sources/strategies/fantia.rb b/app/logical/sources/strategies/fantia.rb index 824ce9ae2..5c05ac6ce 100644 --- a/app/logical/sources/strategies/fantia.rb +++ b/app/logical/sources/strategies/fantia.rb @@ -10,10 +10,6 @@ module Sources::Strategies Source::URL::Fantia === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls return [parsed_url.full_image_url] if parsed_url.image_url? return [image_from_downloadable(parsed_url)] if parsed_url.downloadable? diff --git a/app/logical/sources/strategies/foundation.rb b/app/logical/sources/strategies/foundation.rb index f239640c5..2d1ba0eb7 100644 --- a/app/logical/sources/strategies/foundation.rb +++ b/app/logical/sources/strategies/foundation.rb @@ -8,10 +8,6 @@ module Sources parsed_url&.site_name == "Foundation" end - def site_name - "Foundation" - end - def image_urls if parsed_url.full_image_url.present? [parsed_url.full_image_url] diff --git a/app/logical/sources/strategies/hentai_foundry.rb b/app/logical/sources/strategies/hentai_foundry.rb index 4e39b4624..f3e962e5c 100644 --- a/app/logical/sources/strategies/hentai_foundry.rb +++ b/app/logical/sources/strategies/hentai_foundry.rb @@ -8,10 +8,6 @@ module Sources parsed_url&.site_name == "Hentai Foundry" end - def site_name - parsed_url.site_name - end - def image_urls image = page&.search("#picBox img") diff --git a/app/logical/sources/strategies/lofter.rb b/app/logical/sources/strategies/lofter.rb index bc9e8f0e4..77e1baf86 100644 --- a/app/logical/sources/strategies/lofter.rb +++ b/app/logical/sources/strategies/lofter.rb @@ -8,10 +8,6 @@ module Sources parsed_url&.site_name == "Lofter" end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [parsed_url.full_image_url] diff --git a/app/logical/sources/strategies/mastodon.rb b/app/logical/sources/strategies/mastodon.rb index 6325d605b..e6c813856 100644 --- a/app/logical/sources/strategies/mastodon.rb +++ b/app/logical/sources/strategies/mastodon.rb @@ -7,10 +7,6 @@ module Sources::Strategies Source::URL::Mastodon === parsed_url end - def site_name - parsed_url.site_name - end - def domain case site_name when "Pawoo" then "pawoo.net" diff --git a/app/logical/sources/strategies/newgrounds.rb b/app/logical/sources/strategies/newgrounds.rb index 422c1ac40..d985272fc 100644 --- a/app/logical/sources/strategies/newgrounds.rb +++ b/app/logical/sources/strategies/newgrounds.rb @@ -8,10 +8,6 @@ module Sources parsed_url&.site_name == "Newgrounds" end - def site_name - "Newgrounds" - end - def image_urls if parsed_url.image_url? [url] diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index 5a0324c7a..c4d6dc284 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -12,10 +12,6 @@ module Sources Source::URL::NicoSeiga === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if image_id.present? [image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")] diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index 47137e27b..57e7690bd 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -12,10 +12,6 @@ module Sources Source::URL::Nijie === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [parsed_url.full_image_url] diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 628e31a94..0cffcccb8 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -36,10 +36,6 @@ module Sources Source::URL::Pixiv === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if is_ugoira? [api_ugoira[:originalSrc]] diff --git a/app/logical/sources/strategies/pixiv_sketch.rb b/app/logical/sources/strategies/pixiv_sketch.rb index 2c7077d51..5fe19eb51 100644 --- a/app/logical/sources/strategies/pixiv_sketch.rb +++ b/app/logical/sources/strategies/pixiv_sketch.rb @@ -8,10 +8,6 @@ module Sources Source::URL::PixivSketch === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [parsed_url.full_image_url] diff --git a/app/logical/sources/strategies/plurk.rb b/app/logical/sources/strategies/plurk.rb index f5340ea61..2c3de7380 100644 --- a/app/logical/sources/strategies/plurk.rb +++ b/app/logical/sources/strategies/plurk.rb @@ -8,10 +8,6 @@ module Sources Source::URL::Plurk === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls # * Posts can have up to 10 images. # * Artists commonly post extra images by replying to their own post. diff --git a/app/logical/sources/strategies/skeb.rb b/app/logical/sources/strategies/skeb.rb index a5518803e..4c17b25e0 100644 --- a/app/logical/sources/strategies/skeb.rb +++ b/app/logical/sources/strategies/skeb.rb @@ -8,10 +8,6 @@ module Sources parsed_url&.site_name == "Skeb" end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [url] diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index dbd3c9841..55dfd0fdd 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -11,10 +11,6 @@ module Sources::Strategies Source::URL::Tumblr === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls return [find_largest(parsed_url)].compact if parsed_url.asset_url? diff --git a/app/logical/sources/strategies/twit_pic.rb b/app/logical/sources/strategies/twit_pic.rb index eef260b7d..1cfad1dea 100644 --- a/app/logical/sources/strategies/twit_pic.rb +++ b/app/logical/sources/strategies/twit_pic.rb @@ -7,10 +7,6 @@ module Sources::Strategies parsed_url&.site_name == "TwitPic" end - def site_name - parsed_url.site_name - end - def normalize_for_source parsed_url.page_url || url end diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index eac5e9841..374f860c7 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -27,10 +27,6 @@ module Sources::Strategies parsed_url&.site_name == "Twitter" end - def site_name - parsed_url.site_name - end - def image_urls # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig if parsed_url.image_url? diff --git a/app/logical/sources/strategies/weibo.rb b/app/logical/sources/strategies/weibo.rb index c40979a0f..e88858a74 100644 --- a/app/logical/sources/strategies/weibo.rb +++ b/app/logical/sources/strategies/weibo.rb @@ -9,10 +9,6 @@ module Sources Source::URL::Weibo === parsed_url end - def site_name - parsed_url.site_name - end - def image_urls if parsed_url.image_url? [parsed_url.full_image_url] diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index 6a14df295..dbad3882c 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -72,12 +72,11 @@ class ArtistURL < ApplicationRecord end def domain - Danbooru::URL.parse(normalized_url)&.domain.to_s + parsed_url&.domain.to_s end def site_name - source = Sources::Strategies.find(normalized_url) - source.site_name + parsed_url&.site_name.to_s end # A secondary URL is an artist URL that we don't normally want to display, @@ -123,9 +122,14 @@ class ArtistURL < ApplicationRecord def url=(url) super(url) + @parsed_url = Source::URL.parse(url) self.normalized_url = self.class.normalize_normalized_url(self.url) end + def parsed_url + @parsed_url ||= Source::URL.parse(url) + end + def to_s if is_active? url diff --git a/test/unit/sources/null_test.rb b/test/unit/sources/null_test.rb index 94c0f2c60..1f9a75a94 100644 --- a/test/unit/sources/null_test.rb +++ b/test/unit/sources/null_test.rb @@ -12,7 +12,6 @@ module Sources end should "find the metadata" do - assert_equal("oremuhax.x0.com", @site.site_name) assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls) assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url) assert_nil(@site.artist_name)