diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index dabb3c6f2..f98a616fa 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -221,6 +221,8 @@ module Sources :profile_url => profile_url, :image_url => image_url, :image_urls => image_urls, + :page_url => page_url, + :canonical_url => canonical_url, :normalized_for_artist_finder_url => normalize_for_artist_finder, :tags => tags, :translated_tags => translated_tags, diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index 24ec5b539..ae6498d55 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -1,5 +1,6 @@ module Sources::Strategies class Tumblr < Base + BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com} MD5 = %r{(?[0-9a-f]{32})}i FILENAME = %r{(?(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i @@ -13,18 +14,7 @@ module Sources::Strategies end def self.match?(*urls) - urls.compact.any? do |url| - blog_name, post_id = parse_info_from_url(url) - url =~ IMAGE || blog_name.present? && post_id.present? - end - end - - def self.parse_info_from_url(url) - if url =~ POST - [$~[:blog_name], $~[:post_id]] - else - [] - end + urls.compact.any? { |url| url.match?(BASE_URL) } end def site_name @@ -42,22 +32,21 @@ module Sources::Strategies end def page_url - [url, referer_url].each do |x| - if x =~ POST - blog_name, post_id = self.class.parse_info_from_url(x) - return "https://#{blog_name}.tumblr.com/post/#{post_id}" - end - end + return nil unless blog_name.present? && post_id.present? + "https://#{blog_name}.tumblr.com/post/#{post_id}" + end - return super + def canonical_url + page_url end def profile_url - "https://#{artist_name}.tumblr.com/" + return nil if artist_name.blank? + "https://#{artist_name}.tumblr.com" end def artist_name - post[:blog_name] + post[:blog_name] || blog_name end def artist_commentary_title @@ -99,7 +88,6 @@ module Sources::Strategies [etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"] end.uniq end - memoize :tags def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc).strip @@ -175,11 +163,18 @@ module Sources::Strategies html = Nokogiri::HTML.fragment(artist_commentary_desc) html.css("img").map { |node| node["src"] } end - memoize :inline_images + + def blog_name + urls.map { |url| url[POST, :blog_name] }.compact.first + end + + def post_id + urls.map { |url| url[POST, :post_id] }.compact.first + end def api_response return {} unless self.class.enabled? - blog_name, post_id = self.class.parse_info_from_url(page_url) + return {} unless blog_name.present? && post_id.present? body, code = HttpartyCache.get("/#{blog_name}/posts", params: { id: post_id, api_key: Danbooru.config.tumblr_consumer_key }, diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 1008a9005..17d0fe097 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -16,7 +16,7 @@ module Sources end should "get the profile" do - assert_equal("https://noizave.tumblr.com/", @site.profile_url) + assert_equal("https://noizave.tumblr.com", @site.profile_url) end should "get the tags" do @@ -180,10 +180,20 @@ module Sources end context "A deleted tumblr post" do - should "work" do + should "extract the info from the url" do site = Sources::Strategies.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy") + data = { + artist_name: "shimetsukage", + profile_url: "https://shimetsukage.tumblr.com", + page_url: "https://shimetsukage.tumblr.com/post/176805588268", + canonical_url: "https://shimetsukage.tumblr.com/post/176805588268", + image_url: nil, + image_urls: [], + tags: [], + } assert_nothing_raised { site.to_h } + assert_operator(data, :<, site.to_h) end end end