Refactor sources
This commit is contained in:
@@ -3,16 +3,33 @@ module Downloads
|
||||
class Error < Exception ; end
|
||||
|
||||
attr_reader :data, :options
|
||||
attr_accessor :source, :original_source, :downloaded_source
|
||||
attr_accessor :source, :referer
|
||||
|
||||
def initialize(source, options = {})
|
||||
# Prevent Cloudflare from potentially mangling the image. See issue #3528.
|
||||
def self.uncached_url(url, headers = {})
|
||||
url = Addressable::URI.parse(url)
|
||||
|
||||
if is_cloudflare?(url, headers)
|
||||
url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
|
||||
end
|
||||
|
||||
url
|
||||
end
|
||||
|
||||
def self.is_cloudflare?(url, headers = {})
|
||||
Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
|
||||
res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
|
||||
raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
|
||||
|
||||
res.key?("CF-Ray")
|
||||
end
|
||||
end
|
||||
|
||||
def initialize(source, referer=nil, options = {})
|
||||
# source can potentially get rewritten in the course
|
||||
# of downloading a file, so check it again
|
||||
@source = source
|
||||
@original_source = source
|
||||
|
||||
# the URL actually downloaded after rewriting the original source.
|
||||
@downloaded_source = nil
|
||||
@referer = referer
|
||||
|
||||
# we sometimes need to capture data from the source page
|
||||
@data = {}
|
||||
@@ -22,48 +39,31 @@ module Downloads
|
||||
@data[:get_thumbnail] = options[:get_thumbnail]
|
||||
end
|
||||
|
||||
def rewrite_url
|
||||
url, _, _ = before_download(@source, @data)
|
||||
return url
|
||||
end
|
||||
|
||||
def size
|
||||
url, headers, _ = before_download(@source, @data)
|
||||
options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options)
|
||||
res = HTTParty.head(url, options)
|
||||
res.content_length
|
||||
strategy = Sources::Strategies.find(source, referer)
|
||||
options = { timeout: 3, headers: strategy.headers }.deep_merge(Danbooru.config.httparty_options)
|
||||
|
||||
res = HTTParty.head(strategy.file_url, options)
|
||||
|
||||
if res.success?
|
||||
res.content_length
|
||||
else
|
||||
raise HTTParty::ResponseError.new(res)
|
||||
end
|
||||
end
|
||||
|
||||
def download!
|
||||
url, headers, @data = before_download(@source, @data)
|
||||
|
||||
strategy = Sources::Strategies.find(source, referer)
|
||||
output_file = Tempfile.new(binmode: true)
|
||||
http_get_streaming(uncached_url(url, headers), output_file, headers)
|
||||
@data = strategy.data
|
||||
|
||||
@downloaded_source = url
|
||||
@source = after_download(url)
|
||||
http_get_streaming(
|
||||
self.class.uncached_url(strategy.file_url, strategy.headers),
|
||||
output_file,
|
||||
strategy.headers
|
||||
)
|
||||
|
||||
output_file
|
||||
end
|
||||
|
||||
def before_download(url, datums)
|
||||
original_url = url
|
||||
headers = Danbooru.config.http_headers
|
||||
|
||||
RewriteStrategies::Base.strategies.each do |strategy|
|
||||
url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
|
||||
url = original_url if url.nil?
|
||||
end
|
||||
|
||||
return [url, headers, datums]
|
||||
end
|
||||
|
||||
def after_download(src)
|
||||
src = fix_twitter_sources(src)
|
||||
if options[:referer_url].present?
|
||||
src = set_source_to_referer(src, options[:referer_url])
|
||||
end
|
||||
src
|
||||
[output_file, strategy]
|
||||
end
|
||||
|
||||
def validate_local_hosts(url)
|
||||
@@ -111,50 +111,5 @@ module Downloads
|
||||
end
|
||||
end # while
|
||||
end # def
|
||||
|
||||
def fix_twitter_sources(src)
|
||||
if src =~ %r!^https?://(?:video|pbs)\.twimg\.com/! && original_source =~ %r!^https?://twitter\.com/!
|
||||
original_source
|
||||
elsif src =~ %r!^https?://img\.pawoo\.net/! && original_source =~ %r!^https?://pawoo\.net/!
|
||||
original_source
|
||||
else
|
||||
src
|
||||
end
|
||||
end
|
||||
|
||||
def set_source_to_referer(src, referer)
|
||||
if Sources::Strategies::Nijie.url_match?(src) ||
|
||||
Sources::Strategies::Twitter.url_match?(src) || Sources::Strategies::Twitter.url_match?(referer) ||
|
||||
Sources::Strategies::Pawoo.url_match?(src) ||
|
||||
Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer) ||
|
||||
Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
|
||||
strategy = Sources::Site.new(src, :referer_url => referer)
|
||||
strategy.referer_url
|
||||
else
|
||||
src
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Prevent Cloudflare from potentially mangling the image. See issue #3528.
|
||||
def uncached_url(url, headers = {})
|
||||
url = Addressable::URI.parse(url)
|
||||
|
||||
if is_cloudflare?(url, headers)
|
||||
url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
|
||||
end
|
||||
|
||||
url
|
||||
end
|
||||
|
||||
def is_cloudflare?(url, headers = {})
|
||||
Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
|
||||
res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
|
||||
raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
|
||||
|
||||
res.key?("CF-Ray")
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -1,33 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class ArtStation < Base
|
||||
def rewrite(url, headers, data = {})
|
||||
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
|
||||
if url =~ %r!^https?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!
|
||||
original_url, headers = rewrite_large_url(url, headers)
|
||||
if http_exists?(original_url, headers)
|
||||
url = original_url
|
||||
end
|
||||
else
|
||||
url, headers = rewrite_html_url(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_html_url(url, headers)
|
||||
return [url, headers] unless Sources::Strategies::ArtStation.url_match?(url)
|
||||
source = Sources::Site.new(url)
|
||||
source.get
|
||||
[source.image_url, headers]
|
||||
end
|
||||
|
||||
def rewrite_large_url(url, headers)
|
||||
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/original/aoi-ogata-hate-city.jpg?1476754974
|
||||
url = url.sub(%r!/(?:medium|small|large)/!, "/original/")
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,29 +0,0 @@
|
||||
# This is a collection of strategies for normalizing URLs. Most strategies
|
||||
# typically work by parsing and rewriting the URL itself, but some strategies
|
||||
# may delegate to Sources::Strategies to obtain a more canonical URL.
|
||||
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Base
|
||||
attr_reader :url
|
||||
|
||||
def initialize(url = nil)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def self.strategies
|
||||
[Downloads::RewriteStrategies::Pixiv, Downloads::RewriteStrategies::NicoSeiga, Downloads::RewriteStrategies::ArtStation, Downloads::RewriteStrategies::Twitpic, Downloads::RewriteStrategies::DeviantArt, Downloads::RewriteStrategies::Tumblr, Downloads::RewriteStrategies::Moebooru, Downloads::RewriteStrategies::Twitter, Downloads::RewriteStrategies::Nijie, Downloads::RewriteStrategies::Pawoo]
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def http_exists?(url, headers)
|
||||
res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
|
||||
res.success?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,53 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class DeviantArt < Base
|
||||
attr_accessor :url, :source
|
||||
|
||||
def initialize(url)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} || url =~ %r{deviantart\.net/.+/[a-z0-9_]+(_by_[a-z0-9_]+)?-d([a-z0-9]+)\.}i
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_html_pages(url, headers)
|
||||
if url =~ %r{^https?://.+?\.deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
|
||||
return [source.image_url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_thumbnails(url, headers)
|
||||
if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
|
||||
match = $1
|
||||
url.sub!(match + "200H/", match)
|
||||
elsif url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
|
||||
match = $1
|
||||
url.sub!(match + "PRE/", match)
|
||||
elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/}
|
||||
return [source.image_url, headers]
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Cache the source data so it gets fetched at most once.
|
||||
def source
|
||||
@source ||= begin
|
||||
source = ::Sources::Strategies::DeviantArt.new(url)
|
||||
source.get
|
||||
|
||||
source
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,26 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Moebooru < Base
|
||||
DOMAINS = '(?:[^.]+\.)?yande\.re|konachan\.com'
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://(?:#{DOMAINS})}
|
||||
url, headers = rewrite_jpeg_versions(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_jpeg_versions(url, headers)
|
||||
# example: https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg
|
||||
|
||||
if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
|
||||
url = $1 + "/image/" + $2 + ".png"
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,66 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class NicoSeiga < Base
|
||||
attr_accessor :url, :source
|
||||
|
||||
def initialize(url)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://lohas\.nicoseiga\.jp} || url =~ %r{https?://seiga\.nicovideo\.jp}
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
url, headers = rewrite_view_big_pages(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_headers(url, headers)
|
||||
headers["Referer"] = "http://seiga.nicovideo.jp"
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
def rewrite_html_pages(url, headers)
|
||||
# example: http://seiga.nicovideo.jp/seiga/im1389842
|
||||
|
||||
if url =~ %r{https?://seiga\.nicovideo\.jp/seiga/im\d+}
|
||||
return [source.image_url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_thumbnails(url, headers)
|
||||
if url =~ %r{/thumb/\d+}
|
||||
return [source.image_url, headers]
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
def rewrite_view_big_pages(url, headers)
|
||||
# example: http://lohas.nicoseiga.jp/o/40aeedd2848a7780b6046747e75b3566b423a10c/1436307639/5026559
|
||||
|
||||
if url =~ %r{http://lohas\.nicoseiga\.jp/o/}
|
||||
return [source.image_url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
# Cache the source data so it gets fetched at most once.
|
||||
def source
|
||||
@source ||= begin
|
||||
source = ::Sources::Strategies::NicoSeiga.new(url)
|
||||
source.get
|
||||
|
||||
source
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,40 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Nijie < Base
|
||||
attr_accessor :url, :source
|
||||
|
||||
def initialize(url)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_html_pages(url, headers)
|
||||
# example: http://nijie.info/view.php?id=151126
|
||||
|
||||
if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
|
||||
return [source.image_url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
# Cache the source data so it gets fetched at most once.
|
||||
def source
|
||||
@source ||= begin
|
||||
source = ::Sources::Strategies::Nijie.new(url)
|
||||
source.get
|
||||
|
||||
source
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,17 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Pawoo < Base
|
||||
def rewrite(url, headers, data = {})
|
||||
if Sources::Strategies::Pawoo.url_match?(url)
|
||||
source = Sources::Strategies::Pawoo.new(url)
|
||||
source.get
|
||||
url = source.image_url
|
||||
elsif url =~ %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)/small/([a-z0-9]+\.\w+)\z!i
|
||||
url = "https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,127 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Pixiv < Base
|
||||
attr_accessor :url, :source
|
||||
|
||||
def initialize(url)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_cdn(url, headers)
|
||||
end
|
||||
|
||||
if (url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/) && source.illust_id_from_url
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
url, headers = rewrite_old_small_manga_pages(url, headers)
|
||||
url, headers = rewrite_to_thumbnails(url, headers) if data.delete(:get_thumbnail)
|
||||
end
|
||||
|
||||
# http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip
|
||||
if url =~ %r!\Ahttps?://(i\d+\.pixiv|i\.pximg)\.net/img-zip-ugoira/img/\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}/\d+_ugoira\d+x\d+\.zip\z!i
|
||||
data[:is_ugoira] = true
|
||||
data[:ugoira_frame_data] = source.ugoira_frame_data
|
||||
data[:ugoira_content_type] = source.ugoira_content_type
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
rescue PixivApiClient::BadIDError, Sources::Site::NoStrategyError
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_to_thumbnails(url, headers)
|
||||
if url =~ %r!https?://(i\d+)\.pixiv\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
|
||||
url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
|
||||
|
||||
elsif url =~ %r!https?://i\.pximg\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
|
||||
url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
|
||||
|
||||
elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
|
||||
url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
|
||||
|
||||
elsif url =~ %r!https?://i\.pximg\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
|
||||
url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
|
||||
|
||||
elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img(\d+)/img/(.+?)/(\d+)\.!
|
||||
url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
|
||||
|
||||
elsif url =~ %r!https?://i\.pximg\.net/img(\d+)/img/(.+?)/(\d+)\.!
|
||||
url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
def rewrite_headers(url, headers)
|
||||
headers["Referer"] = "http://www.pixiv.net"
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://www.pixiv.net/i/18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
||||
# Plus this:
|
||||
# i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg
|
||||
def rewrite_html_pages(url, headers)
|
||||
if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
|
||||
return [source.file_url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
def rewrite_thumbnails(url, headers)
|
||||
url = source.rewrite_thumbnails(url)
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
|
||||
# http://img04.pixiv.net/img/syounen_no_uta/46170939_p0.jpg
|
||||
# but not these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
|
||||
def rewrite_old_small_manga_pages(url, headers)
|
||||
if url !~ %r!/img-(?:original|master)/img/!i && url =~ %r!/(\d+_p\d+)\.!i
|
||||
match = $1
|
||||
repl = match.sub(/_p/, "_big_p")
|
||||
big_url = url.sub(match, repl)
|
||||
if http_exists?(big_url, headers)
|
||||
url = big_url
|
||||
end
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
def rewrite_cdn(url, headers)
|
||||
if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
|
||||
url = url.sub(".edgesuite.net", "")
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Cache the source data so it gets fetched at most once.
|
||||
def source
|
||||
@source ||= begin
|
||||
source = ::Sources::Site.new(url)
|
||||
source.get
|
||||
|
||||
source
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,70 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com'
|
||||
MD5 = '(?<md5>[0-9a-f]{32})'
|
||||
FILENAME = '(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)'
|
||||
SIZES = '(250|400|500|500h|540|1280|raw)'
|
||||
EXT = '(?<ext>\w+)'
|
||||
|
||||
class Tumblr < Base
|
||||
def rewrite(url, headers, data = {})
|
||||
url = rewrite_cdn(url)
|
||||
url = rewrite_samples(url, headers)
|
||||
url = rewrite_html_pages(url)
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
# Look for the biggest available version on data.tumblr.com. A bigger
|
||||
# version may or may not exist.
|
||||
#
|
||||
# http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png
|
||||
# => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png
|
||||
#
|
||||
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
|
||||
# => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
|
||||
#
|
||||
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
|
||||
# => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
|
||||
#
|
||||
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
|
||||
#
|
||||
# http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
|
||||
# => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
|
||||
#
|
||||
# http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
|
||||
# => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
|
||||
def rewrite_samples(url, headers)
|
||||
if url =~ %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
|
||||
sizes = ["raw", 1280, 640, 540, "500h", 500, 400, 250]
|
||||
candidates = sizes.map do |size|
|
||||
"http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
|
||||
end
|
||||
|
||||
url = candidates.find do |candidate|
|
||||
http_exists?(candidate, headers)
|
||||
end
|
||||
end
|
||||
|
||||
url
|
||||
end
|
||||
|
||||
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
|
||||
def rewrite_cdn(url)
|
||||
url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
|
||||
url
|
||||
end
|
||||
|
||||
def rewrite_html_pages(url)
|
||||
if Sources::Strategies::Tumblr.url_match?(url)
|
||||
url = Sources::Strategies::Tumblr.new(url).image_url
|
||||
end
|
||||
|
||||
url
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,36 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Twitpic < Base
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://twitpic\.com} || url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net}
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_html_pages(url, headers)
|
||||
# example: http://twitpic.com/cpprns
|
||||
|
||||
if url =~ %r{https?://twitpic\.com/([a-z0-9]+)$}
|
||||
id = $1
|
||||
url = "http://twitpic.com/show/full/#{id}"
|
||||
return [url, headers]
|
||||
else
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_thumbnails(url, headers)
|
||||
if url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net/photos/thumb/(\d+\..+)$}
|
||||
match = $1
|
||||
url.sub!("/thumb/" + match, "/large/" + match)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,40 +0,0 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Twitter < Base
|
||||
attr_accessor :url, :source
|
||||
|
||||
def initialize(url)
|
||||
@url = url
|
||||
end
|
||||
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
|
||||
url = source.image_url
|
||||
elsif url =~ %r{^https?://pbs\.twimg\.com}
|
||||
url, headers = rewrite_thumbnails(url, headers, data)
|
||||
end
|
||||
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
def rewrite_thumbnails(url, headers, data)
|
||||
if url =~ %r{^(https?://pbs\.twimg\.com/media/[^:]+)}
|
||||
url = $1 + ":orig"
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Cache the source data so it gets fetched at most once.
|
||||
def source
|
||||
@source ||= begin
|
||||
source = ::Sources::Strategies::Twitter.new(url)
|
||||
source.get
|
||||
|
||||
source
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user