tumblr: extract info from url when api data is unavailable.
Derive the artist name / profile url / page url from the source URLs when the API response is unavailable because the Tumblr post was deleted. This fixes the artist finder to work on bad_tumblr_id posts.
This commit is contained in:
@@ -221,6 +221,8 @@ module Sources
|
|||||||
:profile_url => profile_url,
|
:profile_url => profile_url,
|
||||||
:image_url => image_url,
|
:image_url => image_url,
|
||||||
:image_urls => image_urls,
|
:image_urls => image_urls,
|
||||||
|
:page_url => page_url,
|
||||||
|
:canonical_url => canonical_url,
|
||||||
:normalized_for_artist_finder_url => normalize_for_artist_finder,
|
:normalized_for_artist_finder_url => normalize_for_artist_finder,
|
||||||
:tags => tags,
|
:tags => tags,
|
||||||
:translated_tags => translated_tags,
|
:translated_tags => translated_tags,
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
module Sources::Strategies
|
module Sources::Strategies
|
||||||
class Tumblr < Base
|
class Tumblr < Base
|
||||||
|
BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i
|
||||||
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
|
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
|
||||||
MD5 = %r{(?<md5>[0-9a-f]{32})}i
|
MD5 = %r{(?<md5>[0-9a-f]{32})}i
|
||||||
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
|
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
|
||||||
@@ -13,18 +14,7 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
|
|
||||||
def self.match?(*urls)
|
def self.match?(*urls)
|
||||||
urls.compact.any? do |url|
|
urls.compact.any? { |url| url.match?(BASE_URL) }
|
||||||
blog_name, post_id = parse_info_from_url(url)
|
|
||||||
url =~ IMAGE || blog_name.present? && post_id.present?
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
def self.parse_info_from_url(url)
|
|
||||||
if url =~ POST
|
|
||||||
[$~[:blog_name], $~[:post_id]]
|
|
||||||
else
|
|
||||||
[]
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
def site_name
|
||||||
@@ -42,22 +32,21 @@ module Sources::Strategies
|
|||||||
end
|
end
|
||||||
|
|
||||||
def page_url
|
def page_url
|
||||||
[url, referer_url].each do |x|
|
return nil unless blog_name.present? && post_id.present?
|
||||||
if x =~ POST
|
"https://#{blog_name}.tumblr.com/post/#{post_id}"
|
||||||
blog_name, post_id = self.class.parse_info_from_url(x)
|
end
|
||||||
return "https://#{blog_name}.tumblr.com/post/#{post_id}"
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
return super
|
def canonical_url
|
||||||
|
page_url
|
||||||
end
|
end
|
||||||
|
|
||||||
def profile_url
|
def profile_url
|
||||||
"https://#{artist_name}.tumblr.com/"
|
return nil if artist_name.blank?
|
||||||
|
"https://#{artist_name}.tumblr.com"
|
||||||
end
|
end
|
||||||
|
|
||||||
def artist_name
|
def artist_name
|
||||||
post[:blog_name]
|
post[:blog_name] || blog_name
|
||||||
end
|
end
|
||||||
|
|
||||||
def artist_commentary_title
|
def artist_commentary_title
|
||||||
@@ -99,7 +88,6 @@ module Sources::Strategies
|
|||||||
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
|
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
|
||||||
end.uniq
|
end.uniq
|
||||||
end
|
end
|
||||||
memoize :tags
|
|
||||||
|
|
||||||
def dtext_artist_commentary_desc
|
def dtext_artist_commentary_desc
|
||||||
DText.from_html(artist_commentary_desc).strip
|
DText.from_html(artist_commentary_desc).strip
|
||||||
@@ -175,11 +163,18 @@ module Sources::Strategies
|
|||||||
html = Nokogiri::HTML.fragment(artist_commentary_desc)
|
html = Nokogiri::HTML.fragment(artist_commentary_desc)
|
||||||
html.css("img").map { |node| node["src"] }
|
html.css("img").map { |node| node["src"] }
|
||||||
end
|
end
|
||||||
memoize :inline_images
|
|
||||||
|
def blog_name
|
||||||
|
urls.map { |url| url[POST, :blog_name] }.compact.first
|
||||||
|
end
|
||||||
|
|
||||||
|
def post_id
|
||||||
|
urls.map { |url| url[POST, :post_id] }.compact.first
|
||||||
|
end
|
||||||
|
|
||||||
def api_response
|
def api_response
|
||||||
return {} unless self.class.enabled?
|
return {} unless self.class.enabled?
|
||||||
blog_name, post_id = self.class.parse_info_from_url(page_url)
|
return {} unless blog_name.present? && post_id.present?
|
||||||
|
|
||||||
body, code = HttpartyCache.get("/#{blog_name}/posts",
|
body, code = HttpartyCache.get("/#{blog_name}/posts",
|
||||||
params: { id: post_id, api_key: Danbooru.config.tumblr_consumer_key },
|
params: { id: post_id, api_key: Danbooru.config.tumblr_consumer_key },
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
should "get the profile" do
|
should "get the profile" do
|
||||||
assert_equal("https://noizave.tumblr.com/", @site.profile_url)
|
assert_equal("https://noizave.tumblr.com", @site.profile_url)
|
||||||
end
|
end
|
||||||
|
|
||||||
should "get the tags" do
|
should "get the tags" do
|
||||||
@@ -180,10 +180,20 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
context "A deleted tumblr post" do
|
context "A deleted tumblr post" do
|
||||||
should "work" do
|
should "extract the info from the url" do
|
||||||
site = Sources::Strategies.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy")
|
site = Sources::Strategies.find("http://shimetsukage.tumblr.com/post/176805588268/20180809-ssb-coolboy")
|
||||||
|
data = {
|
||||||
|
artist_name: "shimetsukage",
|
||||||
|
profile_url: "https://shimetsukage.tumblr.com",
|
||||||
|
page_url: "https://shimetsukage.tumblr.com/post/176805588268",
|
||||||
|
canonical_url: "https://shimetsukage.tumblr.com/post/176805588268",
|
||||||
|
image_url: nil,
|
||||||
|
image_urls: [],
|
||||||
|
tags: [],
|
||||||
|
}
|
||||||
|
|
||||||
assert_nothing_raised { site.to_h }
|
assert_nothing_raised { site.to_h }
|
||||||
|
assert_operator(data, :<, site.to_h)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user