sources: fix sources sometimes choosing wrong strategy (fix #3968)

Fix sources choosing the wrong strategy when the referer belongs to a
different site (for example, when uploading a twitter post with a pixiv
referer).

* Fix `match?` to only consider the main url, not the referer.

* Change `match?` to match against a list of domains given by the `domains` method.

* Change `match?` to an instance method.
This commit is contained in:
evazion
2018-11-04 13:00:17 -06:00
parent 4219163042
commit 5cf6a43918
13 changed files with 58 additions and 37 deletions

View File

@@ -16,8 +16,8 @@ module Sources
end
def self.find(url, referer=nil, default: Strategies::Null)
strategy = all.detect { |strategy| strategy.match?(url, referer) } || default
strategy&.new(url, referer)
strategy = all.map { |strategy| strategy.new(url, referer) }.detect(&:match?)
strategy || default.new(url, referer)
end
def self.canonical(url, referer)

View File

@@ -9,10 +9,6 @@ module Sources::Strategies
attr_reader :json, :image_urls
def self.match?(*urls)
urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)}
end
# https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf
# https://sa-dui.artstation.com/projects/DVERn
@@ -24,6 +20,10 @@ module Sources::Strategies
end
end
def domains
["artstation.com"]
end
def site_name
"ArtStation"
end

View File

@@ -14,14 +14,10 @@
module Sources
module Strategies
class Base
attr_reader :url, :referer_url
attr_reader :url, :referer_url, :urls, :parsed_url, :parsed_referer, :parsed_urls
extend Memoist
def self.match?(*urls)
false
end
# Should return true if all prerequisites for using the strategy are met.
# Return false if the strategy requires api keys that have not been configured.
def self.enabled?
@@ -41,10 +37,24 @@ module Sources
def initialize(url, referer_url = nil)
@url = url
@referer_url = referer_url
@urls = [url, referer_url].select(&:present?)
@parsed_url = Addressable::URI.heuristic_parse(url) rescue nil
@parsed_referer = Addressable::URI.heuristic_parse(referer_url) rescue nil
@parsed_urls = [parsed_url, parsed_referer].select(&:present?)
end
def urls
[url, referer_url].select(&:present?)
# Should return true if this strategy should be used. By default, checks
# if the main url belongs to any of the domains associated with this site.
def match?
return false if parsed_url.nil?
parsed_url.domain.in?(domains)
end
# The list of base domains belonging to this site. Subdomains are
# automatically included (i.e. "pixiv.net" matches "fanbox.pixiv.net").
def domains
[]
end
def site_name

View File

@@ -49,8 +49,8 @@ module Sources
PATH_PROFILE = %r{\Ahttps?://(www\.)?deviantart\.com/#{ARTIST}/?\z}i
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
def domains
["deviantart.net", "deviantart.com"]
end
def site_name

View File

@@ -38,8 +38,8 @@ module Sources
delegate :artist_name, :profile_url, :unique_id, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true
def self.match?(*urls)
urls.compact.any? { |x| x.match?(BASE_URL) }
def domains
["yande.re", "konachan.com"]
end
def site_name

View File

@@ -7,8 +7,8 @@ module Sources
PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
def self.match?(*urls)
urls.compact.any? { |x| x.match?(URL) }
def domains
["nicoseiga.jp", "nicovideo.jp"]
end
def site_name

View File

@@ -53,8 +53,8 @@ module Sources
FILENAME = %r!(?:(?<illust_id>\d+)_(?<page>\d+_))?(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i
IMAGE_URL = %r!\Ahttps?://pic\d+\.nijie\.info/#{DIR}/#{FILENAME}\.\w+\z!i
def self.match?(*urls)
urls.compact.any? { |x| x.match?(BASE_URL) }
def domains
["nijie.info"]
end
def site_name

View File

@@ -2,10 +2,8 @@ module Sources::Strategies
class Pawoo < Base
IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!
def self.match?(*urls)
urls.compact.any? do |x|
x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x)
end
def domains
["pawoo.net"]
end
def site_name

View File

@@ -21,10 +21,6 @@ module Sources
FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}|#{FANBOX_ACCOUNT}/i) }
end
def self.to_dtext(text)
if text.nil?
return nil
@@ -47,6 +43,10 @@ module Sources
DText.from_html(text)
end
def domains
["pixiv.net", "pximg.net"]
end
def site_name
"Pixiv"
end

View File

@@ -15,8 +15,12 @@ module Sources
class Stash < DeviantArt
STASH = %r{\Ahttps?://sta\.sh/(?<post_id>[0-9a-zA-Z]+)}i
def self.match?(*urls)
urls.compact.any? { |x| x =~ STASH }
def domains
["deviantart.net", "sta.sh"]
end
def match?
parsed_urls.map(&:domain).any?("sta.sh")
end
def site_name

View File

@@ -15,8 +15,8 @@ module Sources::Strategies
Danbooru.config.tumblr_consumer_key.present?
end
def self.match?(*urls)
urls.compact.any? { |url| url.match?(BASE_URL) }
def domains
["tumblr.com"]
end
def site_name

View File

@@ -9,10 +9,6 @@ module Sources::Strategies
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search]
def self.match?(*urls)
urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
end
def self.enabled?
TwitterService.new.enabled?
end
@@ -35,6 +31,10 @@ module Sources::Strategies
end
end
def domains
["twitter.com", "twimg.com"]
end
def site_name
"Twitter"
end

View File

@@ -229,5 +229,14 @@ module Sources
assert_equal(desc2, site.dtext_artist_commentary_desc)
end
end
context "A twitter post with a pixiv referer" do
should "use the twitter strategy" do
site = Sources::Strategies.find("https://twitter.com/Mityubi/status/849630665603665920", "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=56735489")
assert_equal(site.site_name, "Twitter")
assert_equal("https://pbs.twimg.com/media/C8p-gPhVoAMZupS.png:orig", site.image_url)
end
end
end
end