Files
danbooru/app/logical/sources/strategies/tumblr.rb
evazion c700ea4b5f Fix #4016: Translated tags failing to find some tags.
* Normalize spaces to underscores when saving other names. Preserve case
  since case can be significant.

* Fix WikiPage#other_names_include to search case-insensitively (note:
  this prevents using the index).

* Fix sources to return the raw tags in `#tags` and the normalized tags
  in `#normalized_tags`. The normalized tags are the tags that will be
  matched against other names.
2018-12-16 11:37:57 -06:00

188 lines
4.8 KiB
Ruby

module Sources::Strategies
class Tumblr < Base
SIZES = %w[1280 640 540 500h 500 400 250 100]
BASE_URL = %r!\Ahttps?://(?:[^/]+\.)*tumblr\.com!i
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
MD5 = %r{(?<md5>[0-9a-f]{32})}i
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
EXT = %r{(?<ext>\w+)}
IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_(?<size>\w+)\.#{EXT}\z!i
VIDEO = %r!\Ahttps?://(?:vtt|ve\.media)\.tumblr\.com/!i
POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
def self.enabled?
Danbooru.config.tumblr_consumer_key.present?
end
def domains
["tumblr.com"]
end
def site_name
"Tumblr"
end
def image_url
return image_urls.first unless url.match?(IMAGE) || url.match?(VIDEO)
find_largest(url)
end
def image_urls
list = []
case post[:type]
when "photo"
list += post[:photos].map { |photo| photo[:original_size][:url] }
when "video"
list += [post[:video_url]]
# api response is blank (work is deleted or we were given a direct image with no referer url)
when nil
list += [url] if url.match?(IMAGE) || url.match?(VIDEO)
end
list += inline_images
list.map { |url| find_largest(url) }
end
def preview_urls
image_urls.map do |x|
x.sub(%r!_1280\.(jpg|png|gif|jpeg)\z!, '_250.\1')
end
end
def page_url
return nil unless blog_name.present? && post_id.present?
"https://#{blog_name}.tumblr.com/post/#{post_id}"
end
def canonical_url
page_url
end
def profile_url
return nil if artist_name.blank?
"https://#{artist_name}.tumblr.com"
end
def artist_name
post[:blog_name] || blog_name
end
def artist_commentary_title
case post[:type]
when "text", "link"
post[:title]
when "answer"
"#{post[:asking_name]} asked: #{post[:question]}"
else
nil
end
end
def artist_commentary_desc
case post[:type]
when "text"
post[:body]
when "link"
post[:description]
when "photo", "video"
post[:caption]
when "answer"
post[:answer]
else
nil
end
end
def tags
post[:tags].to_a.map do |tag|
[tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"]
end.uniq
end
def normalize_tag(tag)
tag = tag.tr("-", "_")
super(tag)
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip
end
public
# Look for the biggest available version on media.tumblr.com. A bigger
# version may or may not exist.
#
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
#
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
#
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
#
# http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
# => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
#
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(url, sizes: SIZES)
return url unless url =~ IMAGE
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
candidates.find do |candidate|
http_exists?(candidate, headers)
end
end
def inline_images
html = Nokogiri::HTML.fragment(artist_commentary_desc)
html.css("img").map { |node| node["src"] }
end
def blog_name
urls.map { |url| url[POST, :blog_name] }.compact.first
end
def post_id
urls.map { |url| url[POST, :post_id] }.compact.first
end
def api_response
return {} unless self.class.enabled?
return {} unless blog_name.present? && post_id.present?
body, code = HttpartyCache.get("/#{blog_name}/posts",
params: { id: post_id, api_key: Danbooru.config.tumblr_consumer_key },
base_uri: "https://api.tumblr.com/v2/blog/"
)
if code == 200
return JSON.parse(body, symbolize_names: true)
else
Rails.logger.debug("TumblrApiClient call failed (code=#{code}, body=#{body}, blog_name=#{blog_name}, post_id=#{post_id})")
return {}
end
end
memoize :api_response
def post
api_response.dig(:response, :posts)&.first || {}
end
end
end