Refactor sources
This commit is contained in:
@@ -1,28 +1,52 @@
|
||||
module Sources::Strategies
|
||||
class Tumblr < Base
|
||||
extend Memoist
|
||||
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
|
||||
MD5 = %r{(?<md5>[0-9a-f]{32})}i
|
||||
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
|
||||
SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i
|
||||
EXT = %r{(?<ext>\w+)}
|
||||
IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
|
||||
POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
|
||||
|
||||
def self.url_match?(url)
|
||||
blog_name, post_id = parse_info_from_url(url)
|
||||
blog_name.present? && post_id.present?
|
||||
def self.match?(*urls)
|
||||
urls.compact.any? do |url|
|
||||
blog_name, post_id = parse_info_from_url(url)
|
||||
url =~ IMAGE || blog_name.present? && post_id.present?
|
||||
end
|
||||
end
|
||||
|
||||
def referer_url
|
||||
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
|
||||
"https://#{blog_name}.tumblr.com/post/#{post_id}"
|
||||
end
|
||||
|
||||
def tags
|
||||
post[:tags].map do |tag|
|
||||
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
|
||||
[tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
|
||||
end.uniq
|
||||
def self.parse_info_from_url(url)
|
||||
if url =~ POST
|
||||
[$~[:blog_name], $~[:post_id]]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def site_name
|
||||
"Tumblr"
|
||||
end
|
||||
|
||||
def image_urls
|
||||
image_urls_sub
|
||||
.uniq
|
||||
.map {|x| normalize_cdn(x)}
|
||||
.map {|x| find_largest(x)}
|
||||
.compact
|
||||
.uniq
|
||||
end
|
||||
|
||||
def page_url
|
||||
[url, referer_url].each do |x|
|
||||
if x =~ POST
|
||||
blog_name, post_id = self.class.parse_info_from_url(x)
|
||||
return "https://#{blog_name}.tumblr.com/post/#{post_id}"
|
||||
end
|
||||
end
|
||||
|
||||
return super
|
||||
end
|
||||
|
||||
def profile_url
|
||||
"https://#{artist_name}.tumblr.com/"
|
||||
end
|
||||
@@ -35,8 +59,10 @@ module Sources::Strategies
|
||||
case post[:type]
|
||||
when "text", "link"
|
||||
post[:title]
|
||||
|
||||
when "answer"
|
||||
"#{post[:asking_name]} asked: #{post[:question]}"
|
||||
|
||||
else
|
||||
nil
|
||||
end
|
||||
@@ -46,94 +72,133 @@ module Sources::Strategies
|
||||
case post[:type]
|
||||
when "text"
|
||||
post[:body]
|
||||
|
||||
when "link"
|
||||
post[:description]
|
||||
|
||||
when "photo", "video"
|
||||
post[:caption]
|
||||
|
||||
when "answer"
|
||||
post[:answer]
|
||||
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def tags
|
||||
post[:tags].map do |tag|
|
||||
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
|
||||
etag = tag.gsub(/[ _-]/, "_")
|
||||
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
|
||||
end.uniq
|
||||
end
|
||||
memoize :tags
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc).strip
|
||||
end
|
||||
|
||||
def image_url
|
||||
image_urls.first
|
||||
end
|
||||
public
|
||||
|
||||
def image_urls
|
||||
urls = case post[:type]
|
||||
when "photo"
|
||||
post[:photos].map do |photo|
|
||||
self.class.normalize_image_url(photo[:original_size][:url])
|
||||
end
|
||||
when "video"
|
||||
[post[:video_url]]
|
||||
else
|
||||
[]
|
||||
def image_urls_sub
|
||||
list = []
|
||||
|
||||
if url =~ IMAGE
|
||||
list << url
|
||||
end
|
||||
|
||||
urls += self.class.parse_inline_images(artist_commentary_desc)
|
||||
urls
|
||||
end
|
||||
if page_url !~ POST
|
||||
return list
|
||||
end
|
||||
|
||||
def get
|
||||
end
|
||||
|
||||
module HelperMethods
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
def parse_info_from_url(url)
|
||||
url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
|
||||
[$1, $2]
|
||||
end
|
||||
|
||||
def parse_inline_images(text)
|
||||
html = Nokogiri::HTML.fragment(text)
|
||||
image_urls = html.css("img").map { |node| node["src"] }
|
||||
image_urls = image_urls.map(&method(:normalize_image_url))
|
||||
image_urls
|
||||
end
|
||||
|
||||
def normalize_image_url(url)
|
||||
url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
|
||||
url
|
||||
if post[:type] == "photo"
|
||||
list += post[:photos].map do |photo|
|
||||
photo[:original_size][:url]
|
||||
end
|
||||
end
|
||||
|
||||
def normalized_url
|
||||
if self.class.url_match?(@referer_url)
|
||||
@referer_url
|
||||
elsif self.class.url_match?(@url)
|
||||
@url
|
||||
if post[:type] == "video"
|
||||
list << post[:video_url]
|
||||
end
|
||||
|
||||
if inline_images.any?
|
||||
list += inline_images.to_a
|
||||
end
|
||||
|
||||
if list.any?
|
||||
return list
|
||||
end
|
||||
|
||||
raise "image url not found for (#{url}, #{referer_url})"
|
||||
end
|
||||
|
||||
# Normalize cdn subdomains.
|
||||
#
|
||||
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
def normalize_cdn(x)
|
||||
# does this work?
|
||||
x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com")
|
||||
end
|
||||
|
||||
# Look for the biggest available version on media.tumblr.com. A bigger
|
||||
# version may or may not exist.
|
||||
#
|
||||
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
|
||||
# => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
|
||||
#
|
||||
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
|
||||
# => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
|
||||
#
|
||||
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
# => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
|
||||
#
|
||||
# http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
|
||||
# => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
|
||||
#
|
||||
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
|
||||
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
|
||||
def find_largest(x)
|
||||
if x =~ IMAGE
|
||||
sizes = [1280, 640, 540, "500h", 500, 400, 250]
|
||||
candidates = sizes.map do |size|
|
||||
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
|
||||
end
|
||||
|
||||
return candidates.find do |candidate|
|
||||
http_exists?(candidate, headers)
|
||||
end
|
||||
end
|
||||
|
||||
return x
|
||||
end
|
||||
|
||||
module ApiMethods
|
||||
def client
|
||||
raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
|
||||
::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
|
||||
end
|
||||
|
||||
def api_response
|
||||
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
|
||||
client.posts(blog_name, post_id)
|
||||
end
|
||||
|
||||
def post
|
||||
api_response[:posts].first
|
||||
end
|
||||
def inline_images
|
||||
html = Nokogiri::HTML.fragment(artist_commentary_desc)
|
||||
html.css("img").map { |node| node["src"] }
|
||||
end
|
||||
memoize :inline_images
|
||||
|
||||
include ApiMethods
|
||||
include HelperMethods
|
||||
def client
|
||||
raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
|
||||
|
||||
memoize :client, :api_response
|
||||
TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
|
||||
end
|
||||
memoize :client
|
||||
|
||||
def api_response
|
||||
blog_name, post_id = self.class.parse_info_from_url(page_url)
|
||||
|
||||
raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil?
|
||||
|
||||
client.posts(blog_name, post_id)
|
||||
end
|
||||
memoize :api_response
|
||||
|
||||
def post
|
||||
api_response[:posts].first
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user