Merge pull request #3190 from evazion/feat-tumblr-support
Add Tumblr integration (#3184)
This commit is contained in:
1
Gemfile
1
Gemfile
@@ -45,6 +45,7 @@ gem 'daemons'
|
|||||||
gem 'oauth2'
|
gem 'oauth2'
|
||||||
gem 'bootsnap'
|
gem 'bootsnap'
|
||||||
gem 'addressable'
|
gem 'addressable'
|
||||||
|
gem 'httparty'
|
||||||
|
|
||||||
# needed for looser jpeg header compat
|
# needed for looser jpeg header compat
|
||||||
gem 'ruby-imagespec', :require => "image_spec", :git => "https://github.com/r888888888/ruby-imagespec.git", :branch => "exif-fixes"
|
gem 'ruby-imagespec', :require => "image_spec", :git => "https://github.com/r888888888/ruby-imagespec.git", :branch => "exif-fixes"
|
||||||
|
|||||||
@@ -166,6 +166,8 @@ GEM
|
|||||||
domain_name (~> 0.5)
|
domain_name (~> 0.5)
|
||||||
http-form_data (1.0.2)
|
http-form_data (1.0.2)
|
||||||
http_parser.rb (0.6.0)
|
http_parser.rb (0.6.0)
|
||||||
|
httparty (0.15.5)
|
||||||
|
multi_xml (>= 0.5.2)
|
||||||
httpclient (2.8.0)
|
httpclient (2.8.0)
|
||||||
hurley (0.2)
|
hurley (0.2)
|
||||||
i18n (0.8.1)
|
i18n (0.8.1)
|
||||||
@@ -412,6 +414,7 @@ DEPENDENCIES
|
|||||||
gctools
|
gctools
|
||||||
google-api-client
|
google-api-client
|
||||||
highline
|
highline
|
||||||
|
httparty
|
||||||
mechanize
|
mechanize
|
||||||
memcache-client
|
memcache-client
|
||||||
memcache_mock
|
memcache_mock
|
||||||
|
|||||||
@@ -140,8 +140,8 @@ module Downloads
|
|||||||
def set_source_to_referer(src, referer)
|
def set_source_to_referer(src, referer)
|
||||||
if Sources::Strategies::Nijie.url_match?(src) ||
|
if Sources::Strategies::Nijie.url_match?(src) ||
|
||||||
Sources::Strategies::Twitter.url_match?(src) ||
|
Sources::Strategies::Twitter.url_match?(src) ||
|
||||||
Sources::Strategies::Tumblr.url_match?(src) ||
|
|
||||||
Sources::Strategies::Pawoo.url_match?(src) ||
|
Sources::Strategies::Pawoo.url_match?(src) ||
|
||||||
|
Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer)
|
||||||
Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
|
Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
|
||||||
strategy = Sources::Site.new(src, :referer_url => referer)
|
strategy = Sources::Site.new(src, :referer_url => referer)
|
||||||
strategy.referer_url
|
strategy.referer_url
|
||||||
|
|||||||
@@ -10,6 +10,7 @@ module Downloads
|
|||||||
def rewrite(url, headers, data = {})
|
def rewrite(url, headers, data = {})
|
||||||
url = rewrite_cdn(url)
|
url = rewrite_cdn(url)
|
||||||
url = rewrite_samples(url, headers)
|
url = rewrite_samples(url, headers)
|
||||||
|
url = rewrite_html_pages(url)
|
||||||
|
|
||||||
return [url, headers, data]
|
return [url, headers, data]
|
||||||
end
|
end
|
||||||
@@ -56,6 +57,14 @@ module Downloads
|
|||||||
url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
|
url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
|
||||||
url
|
url
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def rewrite_html_pages(url)
|
||||||
|
if Sources::Strategies::Tumblr.url_match?(url)
|
||||||
|
url = Sources::Strategies::Tumblr.new(url).image_url
|
||||||
|
end
|
||||||
|
|
||||||
|
url
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -1,28 +1,130 @@
|
|||||||
module Sources::Strategies
|
module Sources::Strategies
|
||||||
class Tumblr < Base
|
class Tumblr < Base
|
||||||
|
extend Memoist
|
||||||
|
|
||||||
def self.url_match?(url)
|
def self.url_match?(url)
|
||||||
url =~ %r{^https?://.+\.tumblr\.com/(?:\w+/)?(?:tumblr_)?(\w+_)(\d+)\..+$} || url =~ %r{^https?://[^.]+\.tumblr\.com/(?:post|image)/\d+}
|
blog_name, post_id = parse_info_from_url(url)
|
||||||
|
blog_name.present? && post_id.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
def referer_url
|
def referer_url
|
||||||
if @referer_url =~ %r{^https?://[^.]+\.tumblr\.com/post/\d+} && @url =~ %r{^https?://.+\.tumblr\.com/(?:\w+/)?(?:tumblr_)?(\w+_)(\d+)\..+$}
|
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
|
||||||
@referer_url
|
"https://#{blog_name}.tumblr.com/post/#{post_id}"
|
||||||
elsif @referer_url =~ %r{^https?://[^.]+\.tumblr\.com/image/\d+} && @url =~ %r{^https?://.+\.tumblr\.com/(?:\w+/)?(?:tumblr_)?(\w+_)(\d+)\..+$}
|
|
||||||
@referer_url.sub("/image/", "/post/")
|
|
||||||
else
|
|
||||||
@url
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def tags
|
def tags
|
||||||
[]
|
post[:tags].map do |tag|
|
||||||
|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
|
||||||
|
[tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
|
||||||
|
end.uniq
|
||||||
end
|
end
|
||||||
|
|
||||||
def site_name
|
def site_name
|
||||||
"Tumblr"
|
"Tumblr"
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def profile_url
|
||||||
|
"https://#{artist_name}.tumblr.com/"
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_name
|
||||||
|
post[:blog_name]
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_commentary_title
|
||||||
|
case post[:type]
|
||||||
|
when "text", "link"
|
||||||
|
post[:title]
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def artist_commentary_desc
|
||||||
|
case post[:type]
|
||||||
|
when "text"
|
||||||
|
post[:body]
|
||||||
|
when "link"
|
||||||
|
post[:description]
|
||||||
|
when "photo", "video"
|
||||||
|
post[:caption]
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def image_url
|
||||||
|
image_urls.first
|
||||||
|
end
|
||||||
|
|
||||||
|
def image_urls
|
||||||
|
urls = case post[:type]
|
||||||
|
when "photo"
|
||||||
|
post[:photos].map do |photo|
|
||||||
|
self.class.normalize_image_url(photo[:original_size][:url])
|
||||||
|
end
|
||||||
|
when "video"
|
||||||
|
[post[:video_url]]
|
||||||
|
else
|
||||||
|
[]
|
||||||
|
end
|
||||||
|
|
||||||
|
urls += self.class.parse_inline_images(artist_commentary_desc)
|
||||||
|
urls
|
||||||
|
end
|
||||||
|
|
||||||
def get
|
def get
|
||||||
end
|
end
|
||||||
|
|
||||||
|
module HelperMethods
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
module ClassMethods
|
||||||
|
def parse_info_from_url(url)
|
||||||
|
url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
|
||||||
|
[$1, $2]
|
||||||
|
end
|
||||||
|
|
||||||
|
def parse_inline_images(text)
|
||||||
|
html = Nokogiri::HTML.fragment(text)
|
||||||
|
image_urls = html.css("img").map { |node| node["src"] }
|
||||||
|
image_urls = image_urls.map(&method(:normalize_image_url))
|
||||||
|
image_urls
|
||||||
|
end
|
||||||
|
|
||||||
|
def normalize_image_url(url)
|
||||||
|
url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
|
||||||
|
url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def normalized_url
|
||||||
|
if self.class.url_match?(@referer_url)
|
||||||
|
@referer_url
|
||||||
|
elsif self.class.url_match?(@url)
|
||||||
|
@url
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
module ApiMethods
|
||||||
|
def client
|
||||||
|
::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
|
||||||
|
end
|
||||||
|
|
||||||
|
def api_response
|
||||||
|
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
|
||||||
|
client.posts(blog_name, post_id)
|
||||||
|
end
|
||||||
|
|
||||||
|
def post
|
||||||
|
api_response[:posts].first
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
include ApiMethods
|
||||||
|
include HelperMethods
|
||||||
|
|
||||||
|
memoize :client, :api_response
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
9
app/logical/tumblr_api_client.rb
Normal file
9
app/logical/tumblr_api_client.rb
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
class TumblrApiClient < Struct.new(:api_key)
|
||||||
|
include HTTParty
|
||||||
|
base_uri "https://api.tumblr.com/v2/blog/"
|
||||||
|
|
||||||
|
def posts(blog_name, post_id)
|
||||||
|
response = self.class.get("/#{blog_name}/posts", query: { id: post_id, api_key: api_key })
|
||||||
|
response.parsed_response.with_indifferent_access[:response]
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -382,6 +382,12 @@ module Danbooru
|
|||||||
nil
|
nil
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# 1. Register app at https://www.tumblr.com/oauth/register.
|
||||||
|
# 2. Copy "OAuth Consumer Key" from https://www.tumblr.com/oauth/apps.
|
||||||
|
def tumblr_consumer_key
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
|
||||||
def enable_dimension_autotagging
|
def enable_dimension_autotagging
|
||||||
true
|
true
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -74,5 +74,15 @@ module Downloads
|
|||||||
assert_rewritten(@rewrite, @source)
|
assert_rewritten(@rewrite, @source)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "a download for a *.tumblr.com/post/* html page" do
|
||||||
|
should "download the best available version" do
|
||||||
|
@source = "https://noizave.tumblr.com/post/162206271767"
|
||||||
|
@rewrite = "http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png"
|
||||||
|
|
||||||
|
assert_downloaded(3_620, @source)
|
||||||
|
assert_rewritten(@rewrite, @source)
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
136
test/unit/sources/tumblr_test.rb
Normal file
136
test/unit/sources/tumblr_test.rb
Normal file
@@ -0,0 +1,136 @@
|
|||||||
|
require 'test_helper'
|
||||||
|
|
||||||
|
module Sources
|
||||||
|
class TumblrTest < ActiveSupport::TestCase
|
||||||
|
context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do
|
||||||
|
setup do
|
||||||
|
@site = Sources::Site.new("https://noizave.tumblr.com/post/162206271767")
|
||||||
|
@site.get
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the artist name" do
|
||||||
|
assert_equal("noizave", @site.artist_name)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the profile" do
|
||||||
|
assert_equal("https://noizave.tumblr.com/", @site.profile_url)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the tags" do
|
||||||
|
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]]
|
||||||
|
assert_equal(tags, @site.tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the commentary" do
|
||||||
|
desc = <<-EOS.strip_heredoc.chomp
|
||||||
|
<h2>header</h2>
|
||||||
|
|
||||||
|
<hr><p>plain <b>bold</b> <i>italics</i> <strike>strike</strike></p>
|
||||||
|
|
||||||
|
<!-- more -->
|
||||||
|
|
||||||
|
<ol><li>one</li>
|
||||||
|
<li>two</li>
|
||||||
|
</ol><ul><li>one</li>
|
||||||
|
<ul><li>two</li>
|
||||||
|
</ul></ul><blockquote><p>quote</p></blockquote>
|
||||||
|
|
||||||
|
<p><a href=\"http://www.google.com\">link</a></p>
|
||||||
|
EOS
|
||||||
|
|
||||||
|
assert_nil(@site.artist_commentary_title)
|
||||||
|
assert_equal(desc, @site.artist_commentary_desc)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the image url" do
|
||||||
|
assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "The source for a 'http://*.tumblr.com/image/*' image page" do
|
||||||
|
setup do
|
||||||
|
@site = Sources::Site.new("https://noizave.tumblr.com/image/162206271767")
|
||||||
|
@site.get
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the image url" do
|
||||||
|
assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the tags" do
|
||||||
|
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]]
|
||||||
|
assert_equal(tags, @site.tags)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "The source for a 'http://*.media.tumblr.com/$hash/tumblr_$id_1280.jpg' image with a referer" do
|
||||||
|
setup do
|
||||||
|
@url = "https://68.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"
|
||||||
|
@ref = "https://noizave.tumblr.com/post/162094447052"
|
||||||
|
@site = Sources::Site.new(@url, referer_url: @ref)
|
||||||
|
@site.get
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the image urls" do
|
||||||
|
urls = %w[
|
||||||
|
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_raw.png
|
||||||
|
http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_raw.jpg
|
||||||
|
http://data.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_raw.gif
|
||||||
|
http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_raw.png
|
||||||
|
http://data.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_raw.gif
|
||||||
|
]
|
||||||
|
|
||||||
|
assert_equal(urls, @site.image_urls)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the tags" do
|
||||||
|
tags = [["tag1", "https://tumblr.com/tagged/tag1"], ["tag2", "https://tumblr.com/tagged/tag2"]]
|
||||||
|
assert_equal(tags, @site.tags)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the commentary" do
|
||||||
|
desc = '<p>description</p><figure data-orig-width="1152" data-orig-height="648" class="tmblr-full"><img src="https://68.media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_540.png" data-orig-width="1152" data-orig-height="648"/></figure><figure class="tmblr-full" data-orig-height="273" data-orig-width="300" data-tumblr-attribution="skeleton-war-draft:nYQhsQFR8-n3brTTGanKzA:Ze6nYj1umLk8W"><img src="https://68.media.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_400.gif" data-orig-height="273" data-orig-width="300"/></figure>'
|
||||||
|
assert_equal(desc, @site.artist_commentary_desc)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do
|
||||||
|
setup do
|
||||||
|
@site = Sources::Site.new("https://noizave.tumblr.com/post/162221502947")
|
||||||
|
@site.get
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the image urls" do
|
||||||
|
urls = %w[
|
||||||
|
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_raw.png
|
||||||
|
http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_raw.jpg
|
||||||
|
]
|
||||||
|
|
||||||
|
assert_equal(urls, @site.image_urls)
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the commentary" do
|
||||||
|
desc = '<p>description</p><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://68.media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_540.png" data-orig-height="3000" data-orig-width="3000"/></figure><figure class="tmblr-full" data-orig-height="3000" data-orig-width="3000"><img src="https://68.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_540.jpg" data-orig-height="3000" data-orig-width="3000"/></figure>'
|
||||||
|
|
||||||
|
assert_equal("test post", @site.artist_commentary_title)
|
||||||
|
assert_equal(desc, @site.artist_commentary_desc)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
context "The source for a 'http://*.tumblr.com/post/*' video post with inline images" do
|
||||||
|
setup do
|
||||||
|
@site = Sources::Site.new("https://noizave.tumblr.com/post/162222617101")
|
||||||
|
@site.get
|
||||||
|
end
|
||||||
|
|
||||||
|
should "get the image urls" do
|
||||||
|
urls = %w[
|
||||||
|
https://vtt.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4
|
||||||
|
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_raw.png
|
||||||
|
]
|
||||||
|
|
||||||
|
assert_equal(urls, @site.image_urls)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user