Merge pull request #3805 from r888888888/refactor-sources

Refactor sources
This commit is contained in:
Albert Yi
2018-08-28 12:13:15 -07:00
committed by GitHub
71 changed files with 2340 additions and 2430 deletions

View File

@@ -1,20 +1,12 @@
class SourcesController < ApplicationController class SourcesController < ApplicationController
respond_to :json, :xml respond_to :json, :xml
rescue_from Sources::Site::NoStrategyError, :with => :no_strategy
def show def show
@source = Sources::Site.new(params[:url], :referer_url => params[:ref]) @source = Sources::Strategies.find(params[:url], params[:ref])
@source.get
respond_with(@source.to_h) do |format| respond_with(@source.to_h) do |format|
format.xml { render xml: @source.to_h.to_xml(root: "source") } format.xml { render xml: @source.to_h.to_xml(root: "source") }
format.json { render json: @source.to_h.to_json } format.json { render json: @source.to_h.to_json }
end end
end end
protected
def no_strategy
render json: {message: "Unsupported site"}.to_json, status: 400
end
end end

View File

@@ -5,7 +5,7 @@ class UploadsController < ApplicationController
def new def new
@upload_notice_wiki = WikiPage.titled(Danbooru.config.upload_notice_wiki_page).first @upload_notice_wiki = WikiPage.titled(Danbooru.config.upload_notice_wiki_page).first
@upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare( @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare(
url: params[:url], ref: params[:ref] url: params[:url], ref: params[:ref]
) )
respond_with(@upload) respond_with(@upload)
@@ -43,7 +43,7 @@ class UploadsController < ApplicationController
end end
def preprocess def preprocess
@upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare( @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare(
url: params[:url], file: params[:file], ref: params[:ref] url: params[:url], file: params[:file], ref: params[:ref]
) )
render body: nil render body: nil

View File

@@ -3,16 +3,33 @@ module Downloads
class Error < Exception ; end class Error < Exception ; end
attr_reader :data, :options attr_reader :data, :options
attr_accessor :source, :original_source, :downloaded_source attr_accessor :source, :referer
def initialize(source, options = {}) # Prevent Cloudflare from potentially mangling the image. See issue #3528.
def self.uncached_url(url, headers = {})
url = Addressable::URI.parse(url)
if is_cloudflare?(url, headers)
url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
end
url
end
def self.is_cloudflare?(url, headers = {})
Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
res.key?("CF-Ray")
end
end
def initialize(source, referer=nil, options = {})
# source can potentially get rewritten in the course # source can potentially get rewritten in the course
# of downloading a file, so check it again # of downloading a file, so check it again
@source = source @source = source
@original_source = source @referer = referer
# the URL actually downloaded after rewriting the original source.
@downloaded_source = nil
# we sometimes need to capture data from the source page # we sometimes need to capture data from the source page
@data = {} @data = {}
@@ -22,48 +39,31 @@ module Downloads
@data[:get_thumbnail] = options[:get_thumbnail] @data[:get_thumbnail] = options[:get_thumbnail]
end end
def rewrite_url
url, _, _ = before_download(@source, @data)
return url
end
def size def size
url, headers, _ = before_download(@source, @data) strategy = Sources::Strategies.find(source, referer)
options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options) options = { timeout: 3, headers: strategy.headers }.deep_merge(Danbooru.config.httparty_options)
res = HTTParty.head(url, options)
res = HTTParty.head(strategy.file_url, options)
if res.success?
res.content_length res.content_length
else
raise HTTParty::ResponseError.new(res)
end
end end
def download! def download!
url, headers, @data = before_download(@source, @data) strategy = Sources::Strategies.find(source, referer)
output_file = Tempfile.new(binmode: true) output_file = Tempfile.new(binmode: true)
http_get_streaming(uncached_url(url, headers), output_file, headers) @data = strategy.data
@downloaded_source = url http_get_streaming(
@source = after_download(url) self.class.uncached_url(strategy.file_url, strategy.headers),
output_file,
strategy.headers
)
output_file [output_file, strategy]
end
def before_download(url, datums)
original_url = url
headers = Danbooru.config.http_headers
RewriteStrategies::Base.strategies.each do |strategy|
url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
url = original_url if url.nil?
end
return [url, headers, datums]
end
def after_download(src)
src = fix_twitter_sources(src)
if options[:referer_url].present?
src = set_source_to_referer(src, options[:referer_url])
end
src
end end
def validate_local_hosts(url) def validate_local_hosts(url)
@@ -111,50 +111,5 @@ module Downloads
end end
end # while end # while
end # def end # def
def fix_twitter_sources(src)
if src =~ %r!^https?://(?:video|pbs)\.twimg\.com/! && original_source =~ %r!^https?://twitter\.com/!
original_source
elsif src =~ %r!^https?://img\.pawoo\.net/! && original_source =~ %r!^https?://pawoo\.net/!
original_source
else
src
end
end
def set_source_to_referer(src, referer)
if Sources::Strategies::Nijie.url_match?(src) ||
Sources::Strategies::Twitter.url_match?(src) || Sources::Strategies::Twitter.url_match?(referer) ||
Sources::Strategies::Pawoo.url_match?(src) ||
Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer) ||
Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
strategy = Sources::Site.new(src, :referer_url => referer)
strategy.referer_url
else
src
end
end
private
# Prevent Cloudflare from potentially mangling the image. See issue #3528.
def uncached_url(url, headers = {})
url = Addressable::URI.parse(url)
if is_cloudflare?(url, headers)
url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
end
url
end
def is_cloudflare?(url, headers = {})
Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
res.key?("CF-Ray")
end
end
end end
end end

View File

@@ -1,33 +0,0 @@
module Downloads
module RewriteStrategies
class ArtStation < Base
def rewrite(url, headers, data = {})
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
if url =~ %r!^https?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!
original_url, headers = rewrite_large_url(url, headers)
if http_exists?(original_url, headers)
url = original_url
end
else
url, headers = rewrite_html_url(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_html_url(url, headers)
return [url, headers] unless Sources::Strategies::ArtStation.url_match?(url)
source = Sources::Site.new(url)
source.get
[source.image_url, headers]
end
def rewrite_large_url(url, headers)
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/original/aoi-ogata-hate-city.jpg?1476754974
url = url.sub(%r!/(?:medium|small|large)/!, "/original/")
return [url, headers]
end
end
end
end

View File

@@ -1,29 +0,0 @@
# This is a collection of strategies for normalizing URLs. Most strategies
# typically work by parsing and rewriting the URL itself, but some strategies
# may delegate to Sources::Strategies to obtain a more canonical URL.
module Downloads
module RewriteStrategies
class Base
attr_reader :url
def initialize(url = nil)
@url = url
end
def self.strategies
[Downloads::RewriteStrategies::Pixiv, Downloads::RewriteStrategies::NicoSeiga, Downloads::RewriteStrategies::ArtStation, Downloads::RewriteStrategies::Twitpic, Downloads::RewriteStrategies::DeviantArt, Downloads::RewriteStrategies::Tumblr, Downloads::RewriteStrategies::Moebooru, Downloads::RewriteStrategies::Twitter, Downloads::RewriteStrategies::Nijie, Downloads::RewriteStrategies::Pawoo]
end
def rewrite(url, headers, data = {})
return [url, headers, data]
end
protected
def http_exists?(url, headers)
res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
res.success?
end
end
end
end

View File

@@ -1,53 +0,0 @@
module Downloads
module RewriteStrategies
class DeviantArt < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ %r{deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} || url =~ %r{deviantart\.net/.+/[a-z0-9_]+(_by_[a-z0-9_]+)?-d([a-z0-9]+)\.}i
url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_html_pages(url, headers)
if url =~ %r{^https?://.+?\.deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
return [source.image_url, headers]
else
return [url, headers]
end
end
def rewrite_thumbnails(url, headers)
if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
match = $1
url.sub!(match + "200H/", match)
elsif url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
match = $1
url.sub!(match + "PRE/", match)
elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/}
return [source.image_url, headers]
end
return [url, headers]
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::DeviantArt.new(url)
source.get
source
end
end
end
end
end

View File

@@ -1,26 +0,0 @@
module Downloads
module RewriteStrategies
class Moebooru < Base
DOMAINS = '(?:[^.]+\.)?yande\.re|konachan\.com'
def rewrite(url, headers, data = {})
if url =~ %r{https?://(?:#{DOMAINS})}
url, headers = rewrite_jpeg_versions(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_jpeg_versions(url, headers)
# example: https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg
if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
url = $1 + "/image/" + $2 + ".png"
end
return [url, headers]
end
end
end
end

View File

@@ -1,66 +0,0 @@
module Downloads
module RewriteStrategies
class NicoSeiga < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ %r{https?://lohas\.nicoseiga\.jp} || url =~ %r{https?://seiga\.nicovideo\.jp}
url, headers = rewrite_headers(url, headers)
url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers)
url, headers = rewrite_view_big_pages(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_headers(url, headers)
headers["Referer"] = "http://seiga.nicovideo.jp"
return [url, headers]
end
def rewrite_html_pages(url, headers)
# example: http://seiga.nicovideo.jp/seiga/im1389842
if url =~ %r{https?://seiga\.nicovideo\.jp/seiga/im\d+}
return [source.image_url, headers]
else
return [url, headers]
end
end
def rewrite_thumbnails(url, headers)
if url =~ %r{/thumb/\d+}
return [source.image_url, headers]
end
return [url, headers]
end
def rewrite_view_big_pages(url, headers)
# example: http://lohas.nicoseiga.jp/o/40aeedd2848a7780b6046747e75b3566b423a10c/1436307639/5026559
if url =~ %r{http://lohas\.nicoseiga\.jp/o/}
return [source.image_url, headers]
else
return [url, headers]
end
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::NicoSeiga.new(url)
source.get
source
end
end
end
end
end

View File

@@ -1,40 +0,0 @@
module Downloads
module RewriteStrategies
class Nijie < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
url, headers = rewrite_html_pages(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_html_pages(url, headers)
# example: http://nijie.info/view.php?id=151126
if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
return [source.image_url, headers]
else
return [url, headers]
end
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::Nijie.new(url)
source.get
source
end
end
end
end
end

View File

@@ -1,17 +0,0 @@
module Downloads
module RewriteStrategies
class Pawoo < Base
def rewrite(url, headers, data = {})
if Sources::Strategies::Pawoo.url_match?(url)
source = Sources::Strategies::Pawoo.new(url)
source.get
url = source.image_url
elsif url =~ %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)/small/([a-z0-9]+\.\w+)\z!i
url = "https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"
end
return [url, headers, data]
end
end
end
end

View File

@@ -1,127 +0,0 @@
module Downloads
module RewriteStrategies
class Pixiv < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/
url, headers = rewrite_headers(url, headers)
url, headers = rewrite_cdn(url, headers)
end
if (url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/) && source.illust_id_from_url
url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers)
url, headers = rewrite_old_small_manga_pages(url, headers)
url, headers = rewrite_to_thumbnails(url, headers) if data.delete(:get_thumbnail)
end
# http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip
if url =~ %r!\Ahttps?://(i\d+\.pixiv|i\.pximg)\.net/img-zip-ugoira/img/\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}/\d+_ugoira\d+x\d+\.zip\z!i
data[:is_ugoira] = true
data[:ugoira_frame_data] = source.ugoira_frame_data
data[:ugoira_content_type] = source.ugoira_content_type
end
return [url, headers, data]
rescue PixivApiClient::BadIDError, Sources::Site::NoStrategyError
return [url, headers, data]
end
protected
def rewrite_to_thumbnails(url, headers)
if url =~ %r!https?://(i\d+)\.pixiv\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
elsif url =~ %r!https?://i\.pximg\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
elsif url =~ %r!https?://i\.pximg\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img(\d+)/img/(.+?)/(\d+)\.!
url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
elsif url =~ %r!https?://i\.pximg\.net/img(\d+)/img/(.+?)/(\d+)\.!
url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
end
return [url, headers]
end
def rewrite_headers(url, headers)
headers["Referer"] = "http://www.pixiv.net"
return [url, headers]
end
# Rewrite these:
# http://www.pixiv.net/i/18557054
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
# Plus this:
# i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg
def rewrite_html_pages(url, headers)
if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
return [source.file_url, headers]
else
return [url, headers]
end
end
# Rewrite these:
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
def rewrite_thumbnails(url, headers)
url = source.rewrite_thumbnails(url)
return [url, headers]
end
# Rewrite these:
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
# http://img04.pixiv.net/img/syounen_no_uta/46170939_p0.jpg
# but not these:
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
def rewrite_old_small_manga_pages(url, headers)
if url !~ %r!/img-(?:original|master)/img/!i && url =~ %r!/(\d+_p\d+)\.!i
match = $1
repl = match.sub(/_p/, "_big_p")
big_url = url.sub(match, repl)
if http_exists?(big_url, headers)
url = big_url
end
end
return [url, headers]
end
def rewrite_cdn(url, headers)
if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
url = url.sub(".edgesuite.net", "")
end
return [url, headers]
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Site.new(url)
source.get
source
end
end
end
end
end

View File

@@ -1,70 +0,0 @@
module Downloads
module RewriteStrategies
DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com'
MD5 = '(?<md5>[0-9a-f]{32})'
FILENAME = '(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)'
SIZES = '(250|400|500|500h|540|1280|raw)'
EXT = '(?<ext>\w+)'
class Tumblr < Base
def rewrite(url, headers, data = {})
url = rewrite_cdn(url)
url = rewrite_samples(url, headers)
url = rewrite_html_pages(url)
return [url, headers, data]
end
protected
# Look for the biggest available version on data.tumblr.com. A bigger
# version may or may not exist.
#
# http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png
# => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png
#
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
#
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
#
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
#
# http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
# => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
#
# http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def rewrite_samples(url, headers)
if url =~ %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
sizes = ["raw", 1280, 640, 540, "500h", 500, 400, 250]
candidates = sizes.map do |size|
"http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
url = candidates.find do |candidate|
http_exists?(candidate, headers)
end
end
url
end
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
def rewrite_cdn(url)
url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
url
end
def rewrite_html_pages(url)
if Sources::Strategies::Tumblr.url_match?(url)
url = Sources::Strategies::Tumblr.new(url).image_url
end
url
end
end
end
end

View File

@@ -1,36 +0,0 @@
module Downloads
module RewriteStrategies
class Twitpic < Base
def rewrite(url, headers, data = {})
if url =~ %r{https?://twitpic\.com} || url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net}
url, headers = rewrite_html_pages(url, headers)
url, headers = rewrite_thumbnails(url, headers)
end
return [url, headers, data]
end
protected
def rewrite_html_pages(url, headers)
# example: http://twitpic.com/cpprns
if url =~ %r{https?://twitpic\.com/([a-z0-9]+)$}
id = $1
url = "http://twitpic.com/show/full/#{id}"
return [url, headers]
else
return [url, headers]
end
end
def rewrite_thumbnails(url, headers)
if url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net/photos/thumb/(\d+\..+)$}
match = $1
url.sub!("/thumb/" + match, "/large/" + match)
end
return [url, headers]
end
end
end
end

View File

@@ -1,40 +0,0 @@
module Downloads
module RewriteStrategies
class Twitter < Base
attr_accessor :url, :source
def initialize(url)
@url = url
end
def rewrite(url, headers, data = {})
if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
url = source.image_url
elsif url =~ %r{^https?://pbs\.twimg\.com}
url, headers = rewrite_thumbnails(url, headers, data)
end
return [url, headers, data]
end
protected
def rewrite_thumbnails(url, headers, data)
if url =~ %r{^(https?://pbs\.twimg\.com/media/[^:]+)}
url = $1 + ":orig"
end
return [url, headers]
end
# Cache the source data so it gets fetched at most once.
def source
@source ||= begin
source = ::Sources::Strategies::Twitter.new(url)
source.get
source
end
end
end
end
end

View File

@@ -1,12 +1,10 @@
class ImageProxy class ImageProxy
def self.needs_proxy?(url) def self.needs_proxy?(url)
fake_referer_for(url).present? fake_referer_for(url).present?
rescue Sources::Site::NoStrategyError
false
end end
def self.fake_referer_for(url) def self.fake_referer_for(url)
Sources::Site.new(url).strategy.try(:fake_referer) Sources::Strategies.find(url).headers["Referer"]
end end
def self.get_image(url) def self.get_image(url)

View File

@@ -10,11 +10,9 @@ module Iqdb
headers = {} headers = {}
datums = {} datums = {}
Downloads::RewriteStrategies::Base.strategies.each do |strategy| strategy = Sources::Strategies.find(url)
url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
end
[url, headers["Referer"]] [strategy.image_url, strategy.headers["Referer"]]
end end
def self.find_similar(source) def self.find_similar(source)

View File

@@ -12,6 +12,8 @@ class NicoSeigaApiClient
resp = HTTParty.get(uri, Danbooru.config.httparty_options) resp = HTTParty.get(uri, Danbooru.config.httparty_options)
if resp.success? if resp.success?
parse_illust_xml_response(resp.body) parse_illust_xml_response(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end end
end end
@@ -20,6 +22,8 @@ class NicoSeigaApiClient
resp = HTTParty.get(uri, Danbooru.config.httparty_options) resp = HTTParty.get(uri, Danbooru.config.httparty_options)
if resp.success? if resp.success?
parse_artist_xml_response(resp.body) parse_artist_xml_response(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end end
end end
@@ -34,6 +38,6 @@ class NicoSeigaApiClient
@image_id = image["id"].to_i @image_id = image["id"].to_i
@user_id = image["user_id"].to_i @user_id = image["user_id"].to_i
@title = image["title"] @title = image["title"]
@desc = image["description"] @desc = image["description"] || image["summary"]
end end
end end

View File

@@ -1,14 +1,26 @@
class PawooApiClient class PawooApiClient
extend Memoist extend Memoist
PROFILE1 = %r!\Ahttps?://pawoo\.net/web/accounts/(\d+)!
PROFILE2 = %r!\Ahttps?://pawoo\.net/@([^/]+)!
STATUS1 = %r!\Ahttps?://pawoo\.net/web/statuses/(\d+)!
STATUS2 = %r!\Ahttps?://pawoo\.net/@.+?/([^/]+)!
class MissingConfigurationError < Exception ; end class MissingConfigurationError < Exception ; end
class Account class Account
attr_reader :json attr_reader :json
def self.is_match?(url) def self.is_match?(url)
url =~ %r!https?://pawoo.net/web/accounts/(\d+)! if url =~ PROFILE1
$1 return $1
end
if url =~ PROFILE2
return $1
end
false
end end
def initialize(json) def initialize(json)
@@ -44,8 +56,15 @@ class PawooApiClient
attr_reader :json attr_reader :json
def self.is_match?(url) def self.is_match?(url)
url =~ %r!https?://pawoo.net/web/statuses/(\d+)! || url =~ %r!https?://pawoo.net/@.+?/(\d+)! if url =~ STATUS1
$1 return $1
end
if url =~ STATUS2
return $1
end
false
end end
def initialize(json) def initialize(json)
@@ -82,11 +101,11 @@ class PawooApiClient
def get(url) def get(url)
if id = Status.is_match?(url) if id = Status.is_match?(url)
Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body)) return Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body))
elsif id = Account.is_match?(url) end
Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body))
else if id = Account.is_match?(url)
nil return Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body))
end end
end end

View File

@@ -1,6 +1,8 @@
require 'resolv-replace' require 'resolv-replace'
class PixivApiClient class PixivApiClient
extend Memoist
API_VERSION = "1" API_VERSION = "1"
CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s" CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s"
CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK" CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK"
@@ -23,90 +25,11 @@ class PixivApiClient
class Error < Exception ; end class Error < Exception ; end
class BadIDError < Error ; end class BadIDError < Error ; end
class WorksResponse class WorkResponse
attr_reader :json, :pages, :name, :moniker, :user_id, :page_count, :tags attr_reader :json, :pages, :name, :moniker, :user_id, :page_count, :tags
attr_reader :artist_commentary_title, :artist_commentary_desc attr_reader :artist_commentary_title, :artist_commentary_desc
def initialize(json) def initialize(json)
# Sample response:
# {
# "status": "success",
# "response": [
# {
# "id": 49270482,
# "title": "ツイログ",
# "caption": null,
# "tags": [
# "神崎蘭子",
# "双葉杏",
# "アイドルマスターシンデレラガールズ",
# "Star!!",
# "アイマス5000users入り"
# ],
# "tools": [
# "CLIP STUDIO PAINT"
# ],
# "image_urls": {
# "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
# },
# "width": 1200,
# "height": 951,
# "stats": {
# "scored_count": 8247,
# "score": 81697,
# "views_count": 191630,
# "favorited_count": {
# "public": 7804,
# "private": 745
# },
# "commented_count": 182
# },
# "publicity": 0,
# "age_limit": "all-age",
# "created_time": "2015-03-14 17:53:32",
# "reuploaded_time": "2015-03-14 17:53:32",
# "user": {
# "id": 341433,
# "account": "nardack",
# "name": "Nardack",
# "is_following": false,
# "is_follower": false,
# "is_friend": false,
# "is_premium": null,
# "profile_image_urls": {
# "px_50x50": "http://i1.pixiv.net/img19/profile/nardack/846482_s.jpg"
# },
# "stats": null,
# "profile": null
# },
# "is_manga": true,
# "is_liked": false,
# "favorite_id": 0,
# "page_count": 2,
# "book_style": "none",
# "type": "illustration",
# "metadata": {
# "pages": [
# {
# "image_urls": {
# "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg",
# "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p0_master1200.jpg"
# }
# },
# {
# "image_urls": {
# "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p1.jpg",
# "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p1_master1200.jpg"
# }
# }
# ]
# },
# "content_type": null
# }
# ],
# "count": 1
# }
@json = json @json = json
@name = json["user"]["name"] @name = json["user"]["name"]
@user_id = json["user"]["id"] @user_id = json["user"]["id"]
@@ -131,7 +54,105 @@ class PixivApiClient
end end
end end
def works(illust_id) class NovelResponse
extend Memoist
attr_reader :json
def initialize(json)
@json = json
end
def name
json["user"]["name"]
end
def user_id
json["user"]["id"]
end
def moniker
json["user"]["account"]
end
def page_count
json["page_count"].to_i
end
def artist_commentary_title
json["title"]
end
def artist_commentary_desc
json["caption"]
end
def tags
json["tags"]
end
def pages
# ex:
# https://i.pximg.net/c/150x150_80/novel-cover-master/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b_master1200.jpg (6096b)
# =>
# https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg (532129b)
[find_original(json["image_urls"]["small"])]
end
memoize :pages
public
PXIMG = %r!\Ahttps?://i\.pximg\.net/c/\d+x\d+_\d+/novel-cover-master/img/(?<timestamp>\d+/\d+/\d+/\d+/\d+/\d+)/(?<filename>\d+_[a-f0-9]+)_master\d+\.(?<ext>jpg|jpeg|png|gif)!i
def find_original(x)
if x =~ PXIMG
return "https://i.pximg.net/novel-cover-original/img/#{$~[:timestamp]}/#{$~[:filename]}.#{$~[:ext]}"
end
return x
end
end
class FanboxResponse
attr_reader :json
def initialize(json)
@json = json
end
def name
json["body"]["user"]["name"]
end
def user_id
json["body"]["user"]["userId"]
end
def moniker
raise NotImplementedError
end
def page_count
json["body"]["body"]["images"].size
end
def artist_commentary_title
json["body"]["title"]
end
def artist_commentary_desc
json["body"]["body"]["text"]
end
def tags
[]
end
def pages
json["body"]["body"]["images"].map {|x| x["originalUrl"]}
end
end
def work(illust_id)
headers = Danbooru.config.http_headers.merge( headers = Danbooru.config.http_headers.merge(
"Referer" => "http://www.pixiv.net", "Referer" => "http://www.pixiv.net",
"Content-Type" => "application/x-www-form-urlencoded", "Content-Type" => "application/x-www-form-urlencoded",
@@ -148,7 +169,7 @@ class PixivApiClient
json = JSON.parse(body) json = JSON.parse(body)
if resp.success? if resp.success?
WorksResponse.new(json["response"][0]) WorkResponse.new(json["response"][0])
elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/ elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/
raise BadIDError.new("Pixiv ##{illust_id} not found: work was deleted, made private, or ID is invalid.") raise BadIDError.new("Pixiv ##{illust_id} not found: work was deleted, made private, or ID is invalid.")
else else
@@ -158,6 +179,40 @@ class PixivApiClient
raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})") raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
end end
def fanbox(fanbox_id)
url = "https://www.pixiv.net/ajax/fanbox/post?postId=#{fanbox_id.to_i}"
resp = agent.get(url)
json = JSON.parse(resp.body)
if resp.code == "200"
FanboxResponse.new(json)
elsif json["status"] == "failure"
raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
end
rescue JSON::ParserError
raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
end
def novel(novel_id)
headers = Danbooru.config.http_headers.merge(
"Referer" => "http://www.pixiv.net",
"Content-Type" => "application/x-www-form-urlencoded",
"Authorization" => "Bearer #{access_token}"
)
url = "https://public-api.secure.pixiv.net/v#{API_VERSION}/novels/#{novel_id.to_i}.json"
resp = HTTParty.get(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
body = resp.body.force_encoding("utf-8")
json = JSON.parse(body)
if resp.success?
NovelResponse.new(json["response"][0])
elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/
raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
end
rescue JSON::ParserError
raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
end
def access_token def access_token
Cache.get("pixiv-papi-access-token", 3000) do Cache.get("pixiv-papi-access-token", 3000) do
access_token = nil access_token = nil
@@ -186,4 +241,9 @@ class PixivApiClient
access_token access_token
end end
end end
def agent
PixivWebAgent.build
end
memoize :agent
end end

View File

@@ -1,78 +0,0 @@
# encoding: UTF-8
module Sources
class Site
class NoStrategyError < RuntimeError ; end
attr_reader :strategy
delegate :url, :get, :get_size, :site_name, :artist_name,
:profile_url, :image_url, :tags, :artists, :unique_id,
:file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
:artist_commentary_title, :artist_commentary_desc,
:dtext_artist_commentary_title, :dtext_artist_commentary_desc,
:rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
def self.strategies
[Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
end
def initialize(url, referer_url: nil)
@url = url
Site.strategies.each do |strategy|
if strategy.url_match?(url) || strategy.url_match?(referer_url)
@strategy = strategy.new(url, referer_url)
return
end
end
raise NoStrategyError.new
end
def referer_url
strategy.try(:referer_url)
end
def normalized_for_artist_finder?
available? && strategy.normalized_for_artist_finder?
end
def normalize_for_artist_finder!
if available? && strategy.normalizable_for_artist_finder?
strategy.normalize_for_artist_finder!
else
url
end
rescue
url
end
def to_h
return {
:artist_name => artist_name,
:artists => artists.as_json(include: :sorted_urls),
:profile_url => profile_url,
:image_url => image_url,
:image_urls => image_urls,
:normalized_for_artist_finder_url => normalize_for_artist_finder!,
:tags => tags,
:translated_tags => translated_tags,
:unique_id => unique_id,
:artist_commentary => {
:title => artist_commentary_title,
:description => artist_commentary_desc,
:dtext_title => dtext_artist_commentary_title,
:dtext_description => dtext_artist_commentary_desc,
}
}
end
def to_json
to_h.to_json
end
def available?
strategy.present?
end
end
end

View File

@@ -0,0 +1,29 @@
module Sources
module Strategies
def self.all
return [
Strategies::Pixiv,
Strategies::NicoSeiga,
Strategies::Twitter,
Strategies::DeviantArt,
Strategies::Tumblr,
Strategies::ArtStation,
Strategies::Nijie,
Strategies::Pawoo,
Strategies::Moebooru,
Strategies::Null # MUST BE LAST!
]
end
def self.find(url, referer=nil)
all
.detect { |strategy| strategy.match?(url, referer) }
.new(url, referer)
end
def self.canonical(url, referer)
find(url, referer).canonical_url
end
end
end

View File

@@ -1,68 +1,165 @@
module Sources::Strategies module Sources::Strategies
class ArtStation < Base class ArtStation < Base
PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)/?\z!i
ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i
PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i
PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i
PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i
PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}!
attr_reader :json, :image_urls attr_reader :json, :image_urls
def self.url_match?(url) def self.match?(*urls)
self.project_id(url).present? urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)}
end end
# https://www.artstation.com/artwork/04XA4 # https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf # https://www.artstation.com/artwork/cody-from-sf
# https://sa-dui.artstation.com/projects/DVERn # https://sa-dui.artstation.com/projects/DVERn
def self.project_id(url) def self.project_id(url)
if url =~ %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)\z!i if url =~ PROJECT
$~[:project_id] $~[:project_id]
else else
nil nil
end end
end end
def referer_url
if self.class.url_match?(@referer_url)
@referer_url
else
@url
end
end
def site_name def site_name
"ArtStation" "ArtStation"
end end
def project_id def image_urls
self.class.project_id(referer_url) image_urls_sub
.map { |asset| original_asset_url(asset) }
end end
memoize :image_urls
def page_url def page_url
"https://www.artstation.com/artwork/#{project_id}" [url, referer_url].each do |x|
if x =~ PROJECT
return "https://www.artstation.com/artwork/#{$~[:project_id]}"
end end
end
return super
end
def profile_url
if url =~ PROFILE1 && $1 != "www"
return "https://www.artstation.com/#{$1}"
end
if url =~ PROFILE2
return "https://www.artstation.com/#{$1}"
end
if url =~ PROFILE3 && url !~ PROJECT
return url
end
api_json["user"]["permalink"]
end
def artist_name
api_json["user"]["username"]
end
def artist_commentary_title
api_json["title"]
end
def artist_commentary_desc
ActionView::Base.full_sanitizer.sanitize(api_json["description"])
end
memoize :artist_commentary_desc
def tags
return nil if !api_json.has_key?("tags")
api_json["tags"].
map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]}
end
memoize :tags
def normalized_for_artist_finder?
url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT
end
def normalizable_for_artist_finder?
url =~ PROFILE || url =~ PROJECT
end
def normalize_for_artist_finder
profile_url
end
public
def image_urls_sub
if url.match?(ASSET)
return [url]
end
api_json["assets"]
.select { |asset| asset["asset_type"] == "image" }
.map { |asset| asset["image_url"] }
end
# these are de facto private methods but are public for testing
# purposes
def project_id
self.class.project_id(url) || self.class.project_id(referer_url)
end
memoize :project_id
def api_url def api_url
"https://www.artstation.com/projects/#{project_id}.json" "https://www.artstation.com/projects/#{project_id}.json"
end end
def image_url def api_json
image_urls.first if project_id.nil?
raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})")
end end
def get
resp = HTTParty.get(api_url, Danbooru.config.httparty_options) resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
image_url_rewriter = Downloads::RewriteStrategies::ArtStation.new
if resp.success? if resp.success?
@json = JSON.parse(resp.body) json = JSON.parse(resp.body)
@artist_name = json["user"]["username"]
@profile_url = json["user"]["permalink"]
images = json["assets"].select { |asset| asset["asset_type"] == "image" }
@image_urls = images.map do |x|
y, _, _ = image_url_rewriter.rewrite(x["image_url"], nil)
y
end
@tags = json["tags"].map {|x| [x.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(x)]} if json["tags"]
@artist_commentary_title = json["title"]
@artist_commentary_desc = ActionView::Base.full_sanitizer.sanitize(json["description"])
else else
raise "HTTP error code: #{resp.code} #{resp.message}" raise HTTParty::ResponseError.new(resp)
end
return json
end
memoize :api_json
# Returns the original representation of the asset, if it exists. Otherwise
# return the url.
def original_asset_url(x)
if x =~ ASSET
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
original_url = x.sub(%r!/(?:medium|small|large)/!, "/original/")
if http_exists?(original_url, headers)
return original_url
end
if x =~ /medium|small/
large_url = x.sub(%r!/(?:medium|small)/!, "/large/")
if http_exists?(large_url, headers)
return large_url
end end
end end
end end
return x
end
def tag_url(name)
"https://www.artstation.com/search?q=" + CGI.escape(name)
end
end
end end

View File

@@ -1,41 +1,108 @@
# This is a collection of strategies for extracting information about a # This is a collection of strategies for extracting information about a
# resource. At a minimum it tries to extract the artist name and a canonical # resource. At a minimum it tries to extract the artist name and a canonical
# URL to download the image from. But it can also be used to normalize a URL # URL to download the image from. But it can also be used to normalize a URL
# for use with the artist finder. It differs from Downloads::RewriteStrategies # for use with the artist finder.
# in that the latter is more for normalizing and rewriting a URL until it is #
# suitable for downloading, whereas Sources::Strategies is more for meta-data # Design Principles
# that can only be obtained by downloading and parsing the resource. #
# In general you should minimize state. You can safely assume that <tt>url</tt>
# and <tt>referer_url</tt> will not change over the lifetime of an instance,
# so you can safely memoize methods and their results. A common pattern is
# conditionally making an external API call and parsing its response. You should
# make this call on demand and memoize the response.
module Sources module Sources
module Strategies module Strategies
class Base class Base
attr_reader :url, :referer_url attr_reader :url, :referer_url
attr_reader :artist_name, :profile_url, :image_url, :tags
attr_reader :artist_commentary_title, :artist_commentary_desc
def self.url_match?(url) extend Memoist
def self.match?(*urls)
false false
end end
# * <tt>url</tt> - Should point to a resource suitable for
# downloading. This may sometimes point to the binary file.
# It may also point to the artist's profile page, in cases
# where this class is being used to normalize artist urls.
# Implementations should be smart enough to detect this and
# behave accordingly.
# * <tt>referer_url</tt> - Sometimes the HTML page cannot be
# determined from <tt>url</tt>. You should generally pass in a
# <tt>referrer_url</tt> so the strategy can discover the HTML
# page and other information.
def initialize(url, referer_url = nil) def initialize(url, referer_url = nil)
@url = url @url = url
@referer_url = referer_url @referer_url = referer_url
end end
# No remote calls are made until this method is called. def site_name
def get
raise NotImplementedError raise NotImplementedError
end end
def get_size # Whatever <tt>url</tt> is, this method should return the direct links
@get_size ||= Downloads::File.new(@image_url).size # to the canonical binary files. It should not be an HTML page. It should
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
# downloader will fetch and save to disk.
def image_urls
raise NotImplementedError
end end
def image_url
image_urls.first
end
# Whatever <tt>url</tt> is, this method should return a link to the HTML
# page containing the resource. It should not be a binary file. It will
# eventually be assigned as the source for the post, but it does not
# represent what the downloader will fetch.
def page_url
Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found"
return nil
end
# This will be the url stored in posts. Typically this is the page
# url, but on some sites it may be preferable to store the image url.
def canonical_url
page_url
end
# A link to the artist's profile page on the site.
def profile_url
nil
end
def artist_name
raise NotImplementedError
end
def artist_commentary_title
nil
end
def artist_commentary_desc
nil
end
# Subclasses should merge in any required headers needed to access resources
# on the site.
def headers
return Danbooru.config.http_headers
end
# Returns the size of the image resource without actually downloading the file.
def size
Downloads::File.new(image_url).size
end
memoize :size
# Subclasses should return true only if the URL is in its final normalized form. # Subclasses should return true only if the URL is in its final normalized form.
# #
# Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder? # Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
# => true # => true
# Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder? # Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
# => false # => false
def normalized_for_artist_finder? def normalized_for_artist_finder?
false false
@@ -44,32 +111,33 @@ module Sources
# Subclasses should return true only if the URL is a valid URL that could # Subclasses should return true only if the URL is a valid URL that could
# be converted into normalized form. # be converted into normalized form.
# #
# Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder? # Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
# => true # => true
# Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder? # Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
# => false # => false
def normalizable_for_artist_finder? def normalizable_for_artist_finder?
false false
end end
def normalize_for_artist_finder! def normalize_for_artist_finder
url profile_url || url
end
def site_name
raise NotImplementedError
end end
# A unique identifier for the artist. This is used for artist creation.
def unique_id def unique_id
artist_name artist_name
end end
def artists def artists
Artist.find_artists(url, referer_url) Artist.find_artists(profile_url)
end end
def image_urls def file_url
[image_url] image_url
end
def data
{}
end end
def tags def tags
@@ -97,11 +165,6 @@ module Sources
translated_tags translated_tags
end end
# Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
def fake_referer
nil
end
def dtext_artist_commentary_title def dtext_artist_commentary_title
self.class.to_dtext(artist_commentary_title) self.class.to_dtext(artist_commentary_title)
end end
@@ -110,9 +173,40 @@ module Sources
self.class.to_dtext(artist_commentary_desc) self.class.to_dtext(artist_commentary_desc)
end end
# A strategy may return extra data unrelated to the file
def data
return {}
end
def to_h
return {
:artist_name => artist_name,
:artists => artists.as_json(include: :sorted_urls),
:profile_url => profile_url,
:image_url => image_url,
:image_urls => image_urls,
:normalized_for_artist_finder_url => normalize_for_artist_finder,
:tags => tags,
:translated_tags => translated_tags,
:unique_id => unique_id,
:artist_commentary => {
:title => artist_commentary_title,
:description => artist_commentary_desc,
:dtext_title => dtext_artist_commentary_title,
:dtext_description => dtext_artist_commentary_desc,
}
}
end
def to_json
to_h.to_json
end
protected protected
def agent
raise NotImplementedError def http_exists?(url, headers)
res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
res.success?
end end
# Convert commentary to dtext by stripping html tags. Sites can override # Convert commentary to dtext by stripping html tags. Sites can override

View File

@@ -1,44 +1,127 @@
module Sources module Sources
module Strategies module Strategies
class DeviantArt < Base class DeviantArt < Base
extend Memoist ATTRIBUTED_ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
PATH_ART = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
SUBDOMAIN_ART = %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
PROFILE = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/?\z}
def self.url_match?(url) def self.match?(*urls)
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/ urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
end
def self.normalize(url)
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
"http://www.deviantart.com/#{$1}#{$2}"
else
url
end
end
def referer_url
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
@referer_url
else
@url
end
end end
def site_name def site_name
"Deviant Art" "Deviant Art"
end end
def unique_id def image_urls
artist_name # normalize thumbnails
if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
match = $1
return [url.sub(match + "200H/", match)]
end end
def get if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
# no-op match = $1
return [url.sub(match + "PRE/", match)]
end
# return direct links
if url =~ ATTRIBUTED_ASSET || url =~ ASSET
return [url]
end
# work is deleted, use image url as given by user.
if uuid.nil?
return [url]
end
# work is downloadable
if api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
src = api_download[:src]
src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.gsub!(/\?.*\z/, "") # strip s3 query params
src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
return [src]
end
# work isn't downloadable, or download size is same as regular size.
if api_deviation.present?
return [api_deviation.dig(:content, :src)]
end
raise "Couldn't find image url"
end
def page_url
[url, referer_url].each do |x|
if x =~ ATTRIBUTED_ASSET
return "http://fav.me/d#{$1}"
end
if x =~ ASSET
return "http://fav.me/d#{$1}"
end
if x =~ PATH_ART
return x
end
if x !~ RESERVED_SUBDOMAINS && x =~ SUBDOMAIN_ART
return "http://www.deviantart.com/#{$1}#{$2}"
end
end
return super
end
def profile_url
if url =~ PROFILE
return url
end
if artist_name.blank?
return nil
end
return "https://www.deviantart.com/#{artist_name}"
end
def artist_name
api_metadata.dig(:author, :username).try(&:downcase)
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
url =~ PATH_ART || url =~ SUBDOMAIN_ART
end
def normalize_for_artist_finder
profile_url
end
def tags
if api_metadata.blank?
return []
end
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end end
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
@@ -71,75 +154,24 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end end
def artist_name public
api_metadata.dig(:author, :username).try(&:downcase)
end
def profile_url
return "" if artist_name.blank?
"https://www.deviantart.com/#{artist_name}"
end
def image_url
# work is deleted, use image url as given by user.
if uuid.nil?
url
# work is downloadable
elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
src = api_download[:src]
src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.gsub!(/\?.*\z/, "") # strip s3 query params
src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
src
# work isn't downloadable, or download size is same as regular size.
elsif api_deviation.present?
api_deviation.dig(:content, :src)
else
raise "couldn't find image url"
end
end
def tags
return [] if api_metadata.blank?
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalizable_for_artist_finder?
url !~ %r!^https?://www.deviantart.com/!
end
def normalized_for_artist_finder?
url =~ %r!^https?://www.deviantart.com/!
end
def normalize_for_artist_finder!
profile_url
end
protected
def normalized_url
@normalized_url ||= self.class.normalize(url)
end
def page def page
options = Danbooru.config.httparty_options.deep_merge(format: :plain, headers: { "Accept-Encoding" => "gzip" }) options = Danbooru.config.httparty_options.deep_merge(
resp = HTTParty.get(normalized_url, **options) format: :plain,
headers: { "Accept-Encoding" => "gzip" }
)
resp = HTTParty.get(page_url, **options)
if resp.success?
body = Zlib.gunzip(resp.body) body = Zlib.gunzip(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end
Nokogiri::HTML(body) Nokogiri::HTML(body)
end end
memoize :page
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B"> # Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
# For private works the UUID will be nil. # For private works the UUID will be nil.
@@ -151,29 +183,39 @@ module Sources
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
uuid uuid
end end
memoize :uuid
def api_client def api_client
api_client = DeviantArtApiClient.new(Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options) api_client = DeviantArtApiClient.new(
api_client.access_token = Cache.get("da-access-token", 55.minutes) { api_client.access_token.to_hash } Danbooru.config.deviantart_client_id,
Danbooru.config.deviantart_client_secret,
Danbooru.config.httparty_options
)
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
api_client.access_token.to_hash
end
api_client api_client
end end
memoize :api_client
def api_deviation def api_deviation
return {} if uuid.nil? return {} if uuid.nil?
api_client.deviation(uuid) api_client.deviation(uuid)
end end
memoize :api_deviation
def api_metadata def api_metadata
return {} if uuid.nil? return {} if uuid.nil?
api_client.metadata(uuid)[:metadata].first api_client.metadata(uuid)[:metadata].first
end end
memoize :api_metadata
def api_download def api_download
return {} if uuid.nil? return {} if uuid.nil?
api_client.download(uuid) api_client.download(uuid)
end end
memoize :api_download
memoize :page, :uuid, :api_client, :api_deviation, :api_metadata, :api_download
end end
end end
end end

View File

@@ -0,0 +1,35 @@
module Sources
module Strategies
class Moebooru < Base
DOMAINS = /(?:[^.]+\.)?yande\.re|konachan\.com/
def self.match?(*urls)
urls.compact.any? { |x| x.match?(DOMAINS) }
end
def site_name
URI.parse(url).host
end
def image_url
if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
return $1 + "/image/" + $2 + ".png"
end
return url
end
def page_url
return url
end
def profile_url
return url
end
def artist_name
return ""
end
end
end
end

View File

@@ -1,145 +1,153 @@
module Sources module Sources
module Strategies module Strategies
class NicoSeiga < Base class NicoSeiga < Base
extend Memoist URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
DIRECT = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
def self.url_match?(url) def self.match?(*urls)
url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/ urls.compact.any? { |x| x.match?(URL) }
end
def referer_url
if @referer_url =~ /seiga\.nicovideo\.jp\/seiga\/im\d+/ && @url =~ /http:\/\/lohas\.nicoseiga\.jp\/(?:priv|o)\//
@referer_url
else
@url
end
end end
def site_name def site_name
"Nico Seiga" "Nico Seiga"
end end
def unique_id def image_urls
profile_url =~ /\/illust\/(\d+)/ if url =~ DIRECT
"nicoseiga" + $1 return [url]
end end
def get
page = load_page
@artist_name, @profile_url = get_profile_from_api
@image_url = get_image_url_from_page(page)
@artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api
# Log out before getting the tags.
# The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily.
# This does not apply if you're logged out (or if you're viewing an adult-rated work).
agent.cookie_jar.clear!
agent.get(normalized_url) do |page|
@tags = get_tags_from_page(page)
end
end
def normalized_for_artist_finder?
url =~ %r!https?://seiga\.nicovideo\.jp/user/illust/\d+/!i
end
def normalizable_for_artist_finder?
url =~ %r!https?://seiga\.nicovideo\.jp/seiga/im\d+!i
end
def normalize_for_artist_finder!
page = load_page
@illust_id = get_illust_id_from_url
@artist_name, @profile_url = get_profile_from_api
@profile_url + "/"
end
protected
def api_client
NicoSeigaApiClient.new(get_illust_id_from_url)
end
def get_illust_id_from_url
if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)!
$1.to_i
else
nil
end
end
def load_page
page = agent.get(normalized_url)
if page.search("a#link_btn_login").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nico-seiga-session")
@agent = nil
page = agent.get(normalized_url)
end
page
end
def get_profile_from_api
return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"]
end
def get_image_url_from_page(page)
link = page.search("a#illust_link") link = page.search("a#illust_link")
if link.any? if link.any?
image_url = "http://seiga.nicovideo.jp" + link[0]["href"] image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
page = agent.get(image_url) # need to follow this redirect while logged in or it won't work page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
if page.is_a?(Mechanize::Image) if page.is_a?(Mechanize::Image)
return page.uri.to_s return [page.uri.to_s]
end end
images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//} images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
if images.any? if images.any?
image_url = "http://lohas.nicoseiga.jp" + images[0]["data-src"] return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
end
else
image_url = nil
end
return image_url
end
def get_tags_from_page(page)
links = page.search("a.tag")
links.map do |node|
[node.text, "http://seiga.nicovideo.jp" + node.attr("href")]
end end
end end
def get_artist_commentary_from_api raise "image url not found for (#{url}, #{referer_url})"
[api_client.title, api_client.desc]
end end
def normalized_url def page_url
@normalized_url ||= begin [url, referer_url].each do |x|
if url =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)! if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
"http://seiga.nicovideo.jp/seiga/im#{$1}" return "http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i end
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
"http://seiga.nicovideo.jp/seiga/im#{$1}" return "http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i end
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
"http://seiga.nicovideo.jp/seiga/im#{$1}" return "http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{/seiga/im\d+} end
url
else if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
nil return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{/seiga/im\d+}
return x
end end
end end
return super
end end
def profile_url
if url =~ PROFILE
return url
end
"http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
end
def artist_name
api_client.moniker
end
def artist_commentary_title
api_client.title
end
def artist_commentary_desc
api_client.desc
end
def headers
super.merge(
"Referer" => "https://seiga.nicovideo.jp"
)
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
url =~ PAGE || url =~ PROFILE
end
def normalize_for_artist_finder
"#{profile_url}/"
end
def unique_id
"nicoseiga#{api_client.user_id}"
end
def tags
string = page.at("meta[name=keywords]").try(:[], "content") || ""
string.split(/,/).map do |name|
[name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
end
end
memoize :tags
public
def api_client
NicoSeigaApiClient.new(illust_id)
end
memoize :api_client
def illust_id
if page_url =~ PAGE
return $1.to_i
end
return nil
end
def page
doc = agent.get(page_url)
if doc.search("a#link_btn_login").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nico-seiga-session")
doc = agent.get(page_url)
end
doc
end
memoize :page
def agent def agent
@agent ||= begin
mech = Mechanize.new mech = Mechanize.new
mech.redirect_ok = false mech.redirect_ok = false
mech.keep_alive = false mech.keep_alive = false
@@ -174,9 +182,7 @@ module Sources
mech.redirect_ok = true mech.redirect_ok = true
mech mech
end end
end memoize :agent
memoize :api_client
end end
end end
end end

View File

@@ -1,127 +1,122 @@
module Sources module Sources
module Strategies module Strategies
class Nijie < Base class Nijie < Base
attr_reader :image_urls PICTURE = %r{pic\d+\.nijie.info/nijie_picture/}
PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+}
DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
def self.url_match?(url) def self.match?(*urls)
url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/ urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) }
end
def initialize(url, referer_url=nil)
super(normalize_url(url), normalize_url(referer_url))
end
def referer_url
if @referer_url =~ /nijie\.info\/view\.php.+id=\d+/ && @url =~ /pic\d+\.nijie.info\/nijie_picture\//
@referer_url
else
@url
end
end end
def site_name def site_name
"Nijie" "Nijie"
end end
def image_urls
if url =~ PICTURE
return [url]
end
# http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
if url =~ DIFF
return [normalize_thumbnails(url)]
end
page.search("div#gallery a > img").map do |img|
# //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
normalize_thumbnails("https:" + img.attr("src"))
end.uniq
end
def page_url
[url, referer_url].each do |x|
if x =~ PAGE
return x
end
if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
return "https://nijie.info/view.php?id=#{$1}"
end
end
return super
end
def profile_url
links = page.search("a.name")
if links.any?
return "https://nijie.info/" + links[0]["href"]
end
return nil
end
def artist_name
links = page.search("a.name")
if links.any?
return links[0].text
end
return nil
end
def artist_commentary_title
page.search("h2.illust_title").text
end
def artist_commentary_desc
page.search('meta[property="og:description"]').attr("content").value
end
def tags
links = page.search("div#view-tag a").find_all do |node|
node["href"] =~ /search\.php/
end
if links.any?
return links.map do |node|
[node.inner_text, "https://nijie.info" + node.attr("href")]
end
end
return []
end
def unique_id def unique_id
profile_url =~ /nijie\.info\/members.php\?id=(\d+)/ profile_url =~ /nijie\.info\/members.php\?id=(\d+)/
"nijie" + $1.to_s "nijie" + $1.to_s
end end
def image_url public
image_urls.first
end
def get
page = agent.get(referer_url)
if page.search("div#header-login-container").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nijie-session")
@agent = nil
page = agent.get(referer_url)
end
@artist_name, @profile_url = get_profile_from_page(page)
@image_urls = get_image_urls_from_page(page)
@tags = get_tags_from_page(page)
@artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page)
end
protected
def self.to_dtext(text) def self.to_dtext(text)
text = text.gsub(/\r\n|\r/, "<br>") text = text.gsub(/\r\n|\r/, "<br>")
DText.from_html(text).strip DText.from_html(text).strip
end end
def get_commentary_from_page(page) def normalize_thumbnails(x)
title = page.search("h2.illust_title").text x.gsub(%r!__rs_l120x120/!i, "")
desc = page.search('meta[property="og:description"]').attr("content").value
[title, desc]
end end
def get_profile_from_page(page) def page
links = page.search("a.name") doc = agent.get(page_url)
if links.any? if doc.search("div#header-login-container").any?
profile_url = "http://nijie.info/" + links[0]["href"] # Session cache is invalid, clear it and log in normally.
artist_name = links[0].text Cache.delete("nijie-session")
else doc = agent.get(page_url)
profile_url = nil
artist_name = nil
end end
return [artist_name, profile_url].compact return doc
end
def get_image_urls_from_page(page)
page.search("div#gallery a > img").map do |img|
# //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
url = "https:" + img.attr("src")
normalize_image_url(url)
end
end
def get_tags_from_page(page)
# puts page.root.to_xhtml
links = page.search("div#view-tag a").find_all do |node|
node["href"] =~ /search\.php/
end
if links.any?
links.map do |node|
[node.inner_text, "http://nijie.info" + node.attr("href")]
end
else
[]
end
end
def normalize_url(url)
if url =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
return "http://nijie.info/view.php?id=#{$1}"
else
return url
end
end
def normalize_image_url(image_url)
# http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
image_url = image_url.gsub(%r!__rs_l120x120/!i, "")
end
image_url = image_url.gsub(%r!\Ahttp:!i, "https:")
image_url
end end
memoize :page
def agent def agent
@agent ||= begin
mech = Mechanize.new mech = Mechanize.new
session = Cache.get("nijie-session") session = Cache.get("nijie-session")
@@ -131,14 +126,14 @@ module Sources
cookie.path = "/" cookie.path = "/"
mech.cookie_jar.add(cookie) mech.cookie_jar.add(cookie)
else else
mech.get("http://nijie.info/login.php") do |page| mech.get("https://nijie.info/login.php") do |page|
page.form_with(:action => "/login_int.php") do |form| page.form_with(:action => "/login_int.php") do |form|
form['email'] = Danbooru.config.nijie_login form['email'] = Danbooru.config.nijie_login
form['password'] = Danbooru.config.nijie_password form['password'] = Danbooru.config.nijie_password
end.click_button end.click_button
end end
session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
Cache.put("nijie-session", session.value, 1.month) if session Cache.put("nijie-session", session.value, 1.day) if session
end end
# This cookie needs to be set to allow viewing of adult works while anonymous # This cookie needs to be set to allow viewing of adult works while anonymous
@@ -148,8 +143,16 @@ module Sources
mech.cookie_jar.add(cookie) mech.cookie_jar.add(cookie)
mech mech
end
end rescue Mechanize::ResponseCodeError => x
if x.response_code.to_i == 429
sleep(5)
retry
else
raise
end
end
memoize :agent
end end
end end
end end

View File

@@ -0,0 +1,43 @@
module Sources
module Strategies
class Null < Base
def self.match?(*urls)
true
end
def image_urls
[url]
end
def page_url
url
end
def normalized_for_artist_finder?
true
end
def normalizable_for_artist_finder?
false
end
def normalize_for_artist_finder
url
end
def site_name
URI.parse(url).hostname || "N/A"
rescue
"N/A"
end
def unique_id
url
end
def rewrite(url, headers, data)
return [url, headers, data]
end
end
end
end

View File

@@ -1,62 +1,80 @@
# html page urls:
# https://pawoo.net/@evazion/19451018
# https://pawoo.net/web/statuses/19451018
#
# image urls:
# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
#
# artist urls:
# https://pawoo.net/@evazion
# https://pawoo.net/web/accounts/47806
module Sources::Strategies module Sources::Strategies
class Pawoo < Base class Pawoo < Base
attr_reader :image_urls IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!
def self.url_match?(url) def self.match?(*urls)
PawooApiClient::Status.is_match?(url) || PawooApiClient::Account.is_match?(url) urls.compact.any? do |x|
x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x)
end end
def referer_url
normalized_url
end end
def site_name def site_name
"Pawoo" "Pawoo"
end end
def api_response def image_url
@response ||= PawooApiClient.new.get(normalized_url) image_urls.first
end end
def get # https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
response = api_response # https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
@artist_name = response.account_name # https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
@profile_url = response.profile_url def image_urls
@image_url = response.image_urls.first if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i
@image_urls = response.image_urls return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
@tags = response.tags
@artist_commentary_title = nil
@artist_commentary_desc = response.commentary
end end
def normalized_url if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i
if self.class.url_match?(@url) return [url]
@url
elsif self.class.url_match?(@referer_url)
@referer_url
end end
return api_response.image_urls
end
# https://pawoo.net/@evazion/19451018
# https://pawoo.net/web/statuses/19451018
def page_url
[url, referer_url].each do |x|
if PawooApiClient::Status.is_match?(x)
return x
end
end
return super
end
# https://pawoo.net/@evazion
# https://pawoo.net/web/accounts/47806
def profile_url
if url =~ PawooApiClient::PROFILE2
return "https://pawoo.net/@#{$1}"
end
api_response.profile_url
end
def artist_name
api_response.account_name
end
def artist_commentary_title
nil
end
def artist_commentary_desc
api_response.commentary
end
def tags
api_response.tags
end end
def normalizable_for_artist_finder? def normalizable_for_artist_finder?
true true
end end
def normalize_for_artist_finder! def normalize_for_artist_finder
get profile_url
@profile_url || @url
end end
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
@@ -68,5 +86,18 @@ module Sources::Strategies
end end
end.strip end.strip
end end
public
def api_response
[url, referer_url].each do |x|
if client = PawooApiClient.new.get(x)
return client
end
end
nil
end
memoize :api_response
end end
end end

View File

@@ -1,122 +1,23 @@
# encoding: UTF-8
require 'csv' require 'csv'
module Sources module Sources
module Strategies module Strategies
class Pixiv < Base class Pixiv < Base
attr_reader :zip_url, :ugoira_frame_data, :ugoira_content_type MONIKER = %r!(?:[a-zA-Z0-9_-]+)!
PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z!
EXT = %r!(?:jpg|jpeg|png|gif)!i
MONIKER = '(?:[a-zA-Z0-9_-]+)' WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)!
TIMESTAMP = '(?:[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2})' I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)!
EXT = "(?:jpg|jpeg|png|gif)" IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)!
PXIMG = %r!(?:\A(?:https?://)?i\.pximg\.net)!
TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)!
NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))!
FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!
WEB = '(?:\A(?:https?://)?www\.pixiv\.net)' def self.match?(*urls)
I12 = '(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)' urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}/i) }
IMG = '(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)'
PXIMG = '(?:\A(?:https?://)?i\.pximg\.net)'
TOUCH = '(?:\A(?:https?://)?touch\.pixiv\.net)'
def self.url_match?(url)
url =~ /#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}/i
end
def referer_url
if @referer_url =~ /pixiv\.net\/member_illust.+mode=medium/ && @url =~ /#{IMG}|#{I12}/
@referer_url
else
@url
end
end
def site_name
"Pixiv"
end
def unique_id
@pixiv_moniker
end
def fake_referer
"http://www.pixiv.net"
end
def normalized_for_artist_finder?
url =~ %r!\Ahttp://www\.pixiv\.net/member\.php\?id=[0-9]+\z/!
end
def normalizable_for_artist_finder?
has_moniker? || sample_image? || full_image? || work_page?
end
def normalize_for_artist_finder!
@illust_id = illust_id_from_url!
@metadata = get_metadata_from_papi(@illust_id)
"http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
end
def translate_tag(tag)
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
translated_tags = super(normalized_tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags
end
def get
return unless illust_id_from_url
@illust_id = illust_id_from_url
@metadata = get_metadata_from_papi(@illust_id)
page = agent.get(URI.parse(normalized_url))
if page.search("body.not-logged-in").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("pixiv-phpsessid")
@agent = nil
page = agent.get(URI.parse(normalized_url))
end
@artist_name = @metadata.name
@profile_url = "http://www.pixiv.net/member.php?id=#{@metadata.user_id}"
@pixiv_moniker = @metadata.moniker
@zip_url, @ugoira_frame_data, @ugoira_content_type = get_zip_url_from_api
@tags = @metadata.tags.map do |tag|
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
end
@page_count = @metadata.page_count
@artist_commentary_title = @metadata.artist_commentary_title
@artist_commentary_desc = @metadata.artist_commentary_desc
is_manga = @page_count > 1
if !@zip_url
page = manga_page_from_url(@url).to_i
@image_url = image_urls[page]
end
end
def rewrite_thumbnails(thumbnail_url, is_manga=nil)
thumbnail_url = rewrite_new_medium_images(thumbnail_url)
thumbnail_url = rewrite_medium_ugoiras(thumbnail_url)
thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
return thumbnail_url
end
def agent
@agent ||= PixivWebAgent.build
end
def file_url
image_url || zip_url
end
def image_urls
@metadata.pages
end end
def self.to_dtext(text) def self.to_dtext(text)
@@ -137,18 +38,147 @@ module Sources
DText.from_html(text) DText.from_html(text)
end end
def illust_id_from_url def site_name
if sample_image? || full_image? || work_page? "Pixiv"
illust_id_from_url!
else
nil
end end
rescue Sources::Error
raise if Rails.env.test? def image_urls
image_urls_sub.
map {|x| rewrite_cdn(x)}
rescue PixivApiClient::BadIDError
[url]
end
def page_url
if novel_id.present?
return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
end
if fanbox_id.present?
return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
end
if illust_id.present?
return "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}"
end
return url
rescue PixivApiClient::BadIDError
nil nil
end end
def illust_id_from_url! def canonical_url
return image_url
end
def profile_url
[url, referer_url].each do |x|
if x =~ PROFILE
return x
end
end
"https://www.pixiv.net/member.php?id=#{metadata.user_id}"
rescue PixivApiClient::BadIDError
nil
end
def artist_name
metadata.name
rescue PixivApiClient::BadIDError
nil
end
def artist_commentary_title
metadata.artist_commentary_title
rescue PixivApiClient::BadIDError
nil
end
def artist_commentary_desc
metadata.artist_commentary_desc
rescue PixivApiClient::BadIDError
nil
end
def headers
if fanbox_id.present?
# need the session to download fanbox images
return {
"Referer" => "https://www.pixiv.net/fanbox",
"Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
}
end
return {
"Referer" => "https://www.pixiv.net"
}
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
illust_id.present? || novel_id.present? || fanbox_id.present?
end
def unique_id
moniker
end
def tags
metadata.tags.map do |tag|
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
end
rescue PixivApiClient::BadIDError
[]
end
memoize :tags
def translate_tag(tag)
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
translated_tags = super(normalized_tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags
end
public
def image_urls_sub
# there's too much normalization bullshit we have to deal with
# raw urls, so just fetch the canonical url from the api every
# time.
if manga_page.present?
return [metadata.pages[manga_page]]
end
if metadata.pages.is_a?(Hash)
return [ugoira_zip_url]
end
return metadata.pages
end
def rewrite_cdn(x)
if x =~ %r{\Ahttps?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
return x.sub(".edgesuite.net", "")
end
return x
end
# in order to prevent recursive loops, this method should not make any
# api calls and only try to extract the illust_id from the url. therefore,
# even though it makes sense to reference page_url here, it will only look
# at (url, referer_url).
def illust_id
# http://img18.pixiv.net/img/evazion/14901720.png # http://img18.pixiv.net/img/evazion/14901720.png
# #
# http://i2.pixiv.net/img18/img/evazion/14901720.png # http://i2.pixiv.net/img18/img/evazion/14901720.png
@@ -165,228 +195,166 @@ module Sources
# #
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
$1 return $1
end
[url, referer_url].each do |x|
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054 # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054 # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054 # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1 # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
elsif url =~ /illust_id=(\d+)/i if x =~ /illust_id=(\d+)/i
$1 return $1
end
# http://www.pixiv.net/i/18557054 # http://www.pixiv.net/i/18557054
elsif url =~ %r!pixiv\.net/i/(\d+)!i if x =~ %r!pixiv\.net/i/(\d+)!i
$1 return $1
else
raise Sources::Error.new("Couldn't get illust ID from URL: #{url}")
end end
end end
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg raise Sources::Error.new("Couldn't get illust ID from URL (#{url}, #{referer_url})")
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
#
# http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
# => http://i.pximg.net/img-original/img/2014/05/15/23/53/59/43521009_p1.jpg
def rewrite_new_medium_images(thumbnail_url)
if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i ||
thumbnail_url =~ %r!/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i
page = manga_page_from_url(@url).to_i
thumbnail_url = @metadata.pages[page]
end end
memoize :illust_id
thumbnail_url def novel_id
end [url, referer_url].each do |x|
if x =~ NOVEL_PAGE
# http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira600x600.zip return $1
# => http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira1920x1080.zip
def rewrite_medium_ugoiras(thumbnail_url)
if thumbnail_url =~ %r!/img-zip-ugoira/img/.*/\d+_ugoira600x600.zip!i
thumbnail_url = thumbnail_url.sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end
thumbnail_url
end
# If the thumbnail is for a manga gallery, it needs to be rewritten like this:
#
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
#
# Otherwise, it needs to be rewritten like this:
#
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# => http://i2.pixiv.net/img18/img/evazion/14901720.png
#
def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
if thumbnail_url =~ %r!/img/#{MONIKER}/\d+_[ms]\.#{EXT}!i
if is_manga.nil?
page_count = @metadata.page_count
is_manga = page_count > 1
end
if is_manga
page = manga_page_from_url(@url)
return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
else
return thumbnail_url.sub(/_[ms]\./, ".")
end end
end end
return thumbnail_url return nil
end
memoize :novel_id
def fanbox_id
[url, referer_url].each do |x|
if x =~ FANBOX_PAGE
return $1
end end
def manga_page_from_url(url) if x =~ FANBOX_IMAGE
return $1
end
end
return nil
end
memoize :fanbox_id
def agent
PixivWebAgent.build
end
memoize :agent
def page
agent.get(URI.parse(page_url))
if page.search("body.not-logged-in").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("pixiv-phpsessid")
@agent = nil
page = agent.get(URI.parse(page_url))
end
page
end
memoize :page
def metadata
if novel_id.present?
return PixivApiClient.new.novel(novel_id)
end
if fanbox_id.present?
return PixivApiClient.new.fanbox(fanbox_id)
end
return PixivApiClient.new.work(illust_id)
end
memoize :metadata
def moniker
# we can sometimes get the moniker from the url
if url =~ %r!#{IMG}/img/(#{MONIKER})!i
return $1
end
if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
return $1
end
if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
return $1
end
return metadata.moniker
end
memoize :moniker
def page_count
metadata.page_count
end
def data
return {
ugoira_frame_data: ugoira_frame_data
}
end
def ugoira_zip_url
if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end
end
memoize :ugoira_zip_url
def ugoira_frame_data
return metadata.json.dig("metadata", "frames")
end
memoize :ugoira_frame_data
def ugoira_content_type
case metadata.json["image_urls"].to_s
when /\.jpg/
return "image/jpeg"
when /\.png/
return "image/png"
when /\.gif/
return "image/gif"
end
raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
end
memoize :ugoira_content_type
def is_manga?
page_count > 1
end
# Returns the current page number of the manga. This will not
# make any api calls and only looks at (url, referer_url).
def manga_page
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i
$1 return $1.to_i
end
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0 # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
elsif url =~ /page=(\d+)/i [url, referer_url].each do |x|
$1 if x =~ /page=(\d+)/i
return $1.to_i
else
0
end end
end end
def get_moniker_from_url return nil
case url
when %r!#{IMG}/img/(#{MONIKER})!i
$1
when %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
$1
when %r!#{WEB}/stacc/(#{MONIKER})/?$!i
$1
else
false
end
end
def has_moniker?
get_moniker_from_url != false
end
def get_image_url_from_page(page, is_manga)
if is_manga
elements = page.search("div.works_display a img").find_all do |node|
node["src"] !~ /source\.pixiv\.net/
end
else
elements = page.search("div.works_display div img.big")
elements = page.search("div.works_display div img") if elements.empty?
end
if elements.any?
element = elements.first
thumbnail_url = element.attr("src") || element.attr("data-src")
return rewrite_thumbnails(thumbnail_url, is_manga)
end
if page.body =~ /"original":"(https:.+?)"/
return $1.gsub(/\\\//, '/')
end
end
def get_zip_url_from_api
if @metadata.pages.is_a?(Hash) && @metadata.pages["ugoira600x600"]
zip_url = @metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
frame_data = @metadata.json["metadata"]["frames"]
content_type = nil
case @metadata.json["image_urls"].to_s
when /\.jpg/
content_type = "image/jpeg"
when /\.png/
content_type = "image/png"
when /\.gif/
content_type = "image/gif"
end
return [zip_url, frame_data, content_type]
end
end
def get_zip_url_from_page(page)
scripts = page.search("body script").find_all do |node|
node.text =~ /_ugoira600x600\.zip/
end
if scripts.any?
javascript = scripts.first.text
json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
data = JSON.parse(json)
zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
frame_data = data["frames"]
content_type = data["mime_type"]
return [zip_url, frame_data, content_type]
end
end
def normalized_url
"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
end
def get_metadata_from_papi(illust_id)
@metadata ||= PixivApiClient.new.works(illust_id)
end
def work_page?
return true if url =~ %r!(?:#{WEB}|#{TOUCH})/member_illust\.php! && url =~ %r!mode=(?:medium|big|manga|manga_big)! && url =~ %r!illust_id=\d+!
return true if url =~ %r!(?:#{WEB}|#{TOUCH})/i/\d+$!i
return false
end
def full_image?
# http://img18.pixiv.net/img/evazion/14901720.png?1234
return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
# http://i2.pixiv.net/img18/img/evazion/14901720.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
return true if url =~ %r!#{I12}/img-original/img/#{TIMESTAMP}/\d+_p\d+\.#{EXT}$!i
# http://i.pximg.net/img-original/img/2017/03/22/17/40/51/62041488_p0.jpg
return true if url =~ %r!#{PXIMG}/img-original/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
return true if url =~ %r!(#{I12}|#{PXIMG})/img-zip-ugoira/img/#{TIMESTAMP}/\d+_ugoira\d+x\d+\.zip$!i
return false
end
def sample_image?
# http://img18.pixiv.net/img/evazion/14901720_m.png
return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
# http://i2.pixiv.net/c/64x64/img-master/img/2014/10/09/12/59/50/46441917_square1200.jpg
return true if url =~ %r!#{I12}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}$!i
# http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
return true if url =~ %r!#{PXIMG}/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i.pximg.net/c/600x600/img-master/img/2017/03/22/17/40/51/62041488_p0_master1200.jpg
return true if url =~ %r!#{PXIMG}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
# http://i2.pixiv.net/img-inf/img/2010/11/30/08/54/06/14901765_64x64.jpg
return true if url =~ %r!#{I12}/img-inf/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
return false
end end
memoize :manga_page
end end
end end
end end

View File

@@ -1,28 +1,52 @@
module Sources::Strategies module Sources::Strategies
class Tumblr < Base class Tumblr < Base
extend Memoist DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
MD5 = %r{(?<md5>[0-9a-f]{32})}i
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i
EXT = %r{(?<ext>\w+)}
IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
def self.url_match?(url) def self.match?(*urls)
urls.compact.any? do |url|
blog_name, post_id = parse_info_from_url(url) blog_name, post_id = parse_info_from_url(url)
blog_name.present? && post_id.present? url =~ IMAGE || blog_name.present? && post_id.present?
end
end end
def referer_url def self.parse_info_from_url(url)
blog_name, post_id = self.class.parse_info_from_url(normalized_url) if url =~ POST
"https://#{blog_name}.tumblr.com/post/#{post_id}" [$~[:blog_name], $~[:post_id]]
else
[]
end end
def tags
post[:tags].map do |tag|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
[tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
end.uniq
end end
def site_name def site_name
"Tumblr" "Tumblr"
end end
def image_urls
image_urls_sub
.uniq
.map {|x| normalize_cdn(x)}
.map {|x| find_largest(x)}
.compact
.uniq
end
def page_url
[url, referer_url].each do |x|
if x =~ POST
blog_name, post_id = self.class.parse_info_from_url(x)
return "https://#{blog_name}.tumblr.com/post/#{post_id}"
end
end
return super
end
def profile_url def profile_url
"https://#{artist_name}.tumblr.com/" "https://#{artist_name}.tumblr.com/"
end end
@@ -35,8 +59,10 @@ module Sources::Strategies
case post[:type] case post[:type]
when "text", "link" when "text", "link"
post[:title] post[:title]
when "answer" when "answer"
"#{post[:asking_name]} asked: #{post[:question]}" "#{post[:asking_name]} asked: #{post[:question]}"
else else
nil nil
end end
@@ -46,94 +72,133 @@ module Sources::Strategies
case post[:type] case post[:type]
when "text" when "text"
post[:body] post[:body]
when "link" when "link"
post[:description] post[:description]
when "photo", "video" when "photo", "video"
post[:caption] post[:caption]
when "answer" when "answer"
post[:answer] post[:answer]
else else
nil nil
end end
end end
def tags
post[:tags].map do |tag|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
etag = tag.gsub(/[ _-]/, "_")
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
end.uniq
end
memoize :tags
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip DText.from_html(artist_commentary_desc).strip
end end
def image_url public
image_urls.first
def image_urls_sub
list = []
if url =~ IMAGE
list << url
end end
def image_urls if page_url !~ POST
urls = case post[:type] return list
when "photo"
post[:photos].map do |photo|
self.class.normalize_image_url(photo[:original_size][:url])
end
when "video"
[post[:video_url]]
else
[]
end end
urls += self.class.parse_inline_images(artist_commentary_desc) if post[:type] == "photo"
urls list += post[:photos].map do |photo|
end photo[:original_size][:url]
def get
end
module HelperMethods
extend ActiveSupport::Concern
module ClassMethods
def parse_info_from_url(url)
url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
[$1, $2]
end
def parse_inline_images(text)
html = Nokogiri::HTML.fragment(text)
image_urls = html.css("img").map { |node| node["src"] }
image_urls = image_urls.map(&method(:normalize_image_url))
image_urls
end
def normalize_image_url(url)
url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
url
end end
end end
def normalized_url if post[:type] == "video"
if self.class.url_match?(@referer_url) list << post[:video_url]
@referer_url
elsif self.class.url_match?(@url)
@url
end end
if inline_images.any?
list += inline_images.to_a
end
if list.any?
return list
end
raise "image url not found for (#{url}, #{referer_url})"
end
# Normalize cdn subdomains.
#
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
def normalize_cdn(x)
# does this work?
x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com")
end
# Look for the biggest available version on media.tumblr.com. A bigger
# version may or may not exist.
#
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
#
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
#
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
#
# http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
# => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
#
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(x)
if x =~ IMAGE
sizes = [1280, 640, 540, "500h", 500, 400, 250]
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
return candidates.find do |candidate|
http_exists?(candidate, headers)
end end
end end
module ApiMethods return x
end
def inline_images
html = Nokogiri::HTML.fragment(artist_commentary_desc)
html.css("img").map { |node| node["src"] }
end
memoize :inline_images
def client def client
raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil? raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
end end
memoize :client
def api_response def api_response
blog_name, post_id = self.class.parse_info_from_url(normalized_url) blog_name, post_id = self.class.parse_info_from_url(page_url)
raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil?
client.posts(blog_name, post_id) client.posts(blog_name, post_id)
end end
memoize :api_response
def post def post
api_response[:posts].first api_response[:posts].first
end end
end end
include ApiMethods
include HelperMethods
memoize :client, :api_response
end
end end

View File

@@ -1,52 +1,94 @@
module Sources::Strategies module Sources::Strategies
class Twitter < Base class Twitter < Base
attr_reader :image_urls PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i
ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)}!i
def self.url_match?(url) def self.match?(*urls)
self.status_id_from_url(url).present? urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
end end
def referer_url # https://twitter.com/i/web/status/943446161586733056
normalized_url # https://twitter.com/motty08111213/status/943446161586733056
def self.status_id_from_url(url)
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
return $1
end end
def normalized_url return nil
"https://twitter.com/#{artist_name}/status/#{status_id}"
end
def artist_name
api_response.attrs[:user][:screen_name]
end end
def site_name def site_name
"Twitter" "Twitter"
end end
def api_response def image_urls
@api_response ||= TwitterService.new.client.status(status_id, tweet_mode: "extended") if url =~ /(#{ASSET}[^:]+)/
return [$1 + ":orig" ]
end end
def get [url, referer_url].each do |x|
attrs = api_response.attrs if x =~ PAGE
@profile_url = "https://twitter.com/" + attrs[:user][:screen_name] return service.image_urls(api_response)
@image_urls = TwitterService.new.image_urls(api_response) end
@image_url = @image_urls.first end
@artist_commentary_title = "" rescue Twitter::Error::NotFound
@artist_commentary_desc = attrs[:full_text] url
@tags = attrs[:entities][:hashtags].map do |text:, indices:| end
[text, "https://twitter.com/hashtag/#{text}"] memoize :image_urls
def page_url
[url, referer_url].each do |x|
if self.class.status_id_from_url(x).present?
return x
end end
rescue ::Twitter::Error::Forbidden
end end
def normalize_for_artist_finder! return super
url.downcase end
def profile_url
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i
if $1 != "i"
return "https://twitter.com/#{$1}"
end
end
"https://twitter.com/" + api_response.attrs[:user][:screen_name]
rescue Twitter::Error::NotFound
nil
end
def artist_name
api_response.attrs[:user][:screen_name]
rescue Twitter::Error::NotFound
nil
end
def artist_commentary_title
""
end
def artist_commentary_desc
api_response.attrs[:full_text]
rescue Twitter::Error::NotFound
nil
end end
def normalizable_for_artist_finder? def normalizable_for_artist_finder?
true url =~ PAGE
end end
def normalize_for_artist_finder
profile_url.downcase
end
def tags
api_response.attrs[:entities][:hashtags].map do |text:, indices:|
[text, "https://twitter.com/hashtag/#{text}"]
end
end
memoize :tags
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
url_replacements = api_response.urls.map do |obj| url_replacements = api_response.urls.map do |obj|
[obj.url.to_s, obj.expanded_url.to_s] [obj.url.to_s, obj.expanded_url.to_s]
@@ -63,19 +105,23 @@ module Sources::Strategies
desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]') desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]')
desc.strip desc.strip
end end
memoize :dtext_artist_commentary_desc
public
def service
TwitterService.new
end
memoize :service
def api_response
service.client.status(status_id, tweet_mode: "extended")
end
memoize :api_response
def status_id def status_id
self.class.status_id_from_url(@url) || self.class.status_id_from_url(@referer_url) [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
end
# https://twitter.com/i/web/status/943446161586733056
# https://twitter.com/motty08111213/status/943446161586733056
def self.status_id_from_url(url)
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
$1
else
nil
end
end end
memoize :status_id
end end
end end

View File

@@ -1,8 +1,9 @@
class TwitterService class TwitterService
extend Memoist
def client def client
raise "Twitter API keys not set" if Danbooru.config.twitter_api_key.nil? raise "Twitter API keys not set" if Danbooru.config.twitter_api_key.nil?
@client ||= begin
rest_client = ::Twitter::REST::Client.new do |config| rest_client = ::Twitter::REST::Client.new do |config|
config.consumer_key = Danbooru.config.twitter_api_key config.consumer_key = Danbooru.config.twitter_api_key
config.consumer_secret = Danbooru.config.twitter_api_secret config.consumer_secret = Danbooru.config.twitter_api_secret
@@ -15,7 +16,7 @@ class TwitterService
rest_client rest_client
end end
end memoize :client
def extract_urls_for_status(tweet) def extract_urls_for_status(tweet)
tweet.media.map do |obj| tweet.media.map do |obj|

View File

@@ -49,7 +49,7 @@ class UploadService
@upload.update(status: "processing") @upload.update(status: "processing")
if @upload.file.nil? && Utils.is_downloadable?(source) if @upload.file.nil? && Utils.is_downloadable?(source)
@upload.file = Utils.download_for_upload(source, @upload) @upload.file = Utils.download_for_upload(@upload)
end end
if @upload.file.present? if @upload.file.present?
@@ -111,7 +111,9 @@ class UploadService
p.image_width = upload.image_width p.image_width = upload.image_width
p.image_height = upload.image_height p.image_height = upload.image_height
p.rating = upload.rating p.rating = upload.rating
p.source = upload.source if upload.source.present?
p.source = Sources::Strategies.find(upload.source, upload.referer_url).canonical_url
end
p.file_size = upload.file_size p.file_size = upload.file_size
p.uploader_id = upload.uploader_id p.uploader_id = upload.uploader_id
p.uploader_ip_addr = upload.uploader_ip_addr p.uploader_ip_addr = upload.uploader_ip_addr

View File

@@ -4,13 +4,8 @@ class UploadService
upload = Upload.new upload = Upload.new
if Utils.is_downloadable?(url) && file.nil? if Utils.is_downloadable?(url) && file.nil?
download = Downloads::File.new(url) strategy = Sources::Strategies.find(url, ref)
normalized_url = download.rewrite_url() post = Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, strategy.canonical_url]).first
post = if normalized_url.nil?
Post.where("SourcePattern(lower(posts.source)) = ?", url).first
else
Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, normalized_url]).first
end
if post.nil? if post.nil?
# this gets called from UploadsController#new so we need # this gets called from UploadsController#new so we need
@@ -19,13 +14,15 @@ class UploadService
end end
begin begin
source = Sources::Site.new(url, :referer_url => ref) download = Downloads::File.new(url, ref)
remote_size = download.size remote_size = download.size
rescue Exception rescue Exception
end end
return [upload, post, source, normalized_url, remote_size] return [upload, post, strategy, remote_size]
elsif file end
if file
# this gets called via XHR so we can process sync # this gets called via XHR so we can process sync
Preprocessor.new(file: file).delayed_start(CurrentUser.id) Preprocessor.new(file: file).delayed_start(CurrentUser.id)
end end
@@ -35,9 +32,7 @@ class UploadService
def self.batch(url, ref = nil) def self.batch(url, ref = nil)
if url if url
source = Sources::Site.new(url, :referer_url => ref) return Sources::Strategies.find(url, ref)
source.get
return source
end end
end end
end end

View File

@@ -1,5 +1,7 @@
class UploadService class UploadService
class Preprocessor class Preprocessor
extend Memoist
attr_reader :params, :original_post_id attr_reader :params, :original_post_id
def initialize(params) def initialize(params)
@@ -15,31 +17,40 @@ class UploadService
params[:md5_confirmation] params[:md5_confirmation]
end end
def referer def referer_url
params[:referer_url] params[:referer_url]
end end
def normalized_source def strategy
@normalized_source ||= begin Sources::Strategies.find(source, referer_url)
Downloads::File.new(params[:source]).rewrite_url
end end
memoize :strategy
# When searching posts we have to use the canonical source
def canonical_source
strategy.canonical_url
end end
memoize :canonical_source
def in_progress? def in_progress?
if Utils.is_downloadable?(source) if Utils.is_downloadable?(source)
Upload.where(status: "preprocessing", source: normalized_source).or(Upload.where(status: "preprocessing", alt_source: normalized_source)).exists? return Upload.where(status: "preprocessing", source: source).exists?
elsif md5.present?
Upload.where(status: "preprocessing", md5: md5).exists?
else
false
end end
if md5.present?
return Upload.where(status: "preprocessing", md5: md5).exists?
end
false
end end
def predecessor def predecessor
if Utils.is_downloadable?(source) if Utils.is_downloadable?(source)
Upload.where(status: ["preprocessed", "preprocessing"]).where(source: normalized_source).or(Upload.where(status: ["preprocessed", "preprocessing"], alt_source: normalized_source)).first return Upload.where(status: ["preprocessed", "preprocessing"], source: source).first
elsif md5.present? end
Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first
if md5.present?
return Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first
end end
end end
@@ -59,34 +70,31 @@ class UploadService
def start! def start!
if Utils.is_downloadable?(source) if Utils.is_downloadable?(source)
CurrentUser.as_system do CurrentUser.as_system do
if Post.tag_match("source:#{normalized_source}").where.not(id: original_post_id).exists? if Post.tag_match("source:#{canonical_source}").where.not(id: original_post_id).exists?
raise ActiveRecord::RecordNotUnique.new("A post with source #{normalized_source} already exists") raise ActiveRecord::RecordNotUnique.new("A post with source #{canonical_source} already exists")
end end
end end
if Upload.where(source: normalized_source, status: "completed").exists? if Upload.where(source: source, status: "completed").exists?
raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{normalized_source} already exists") raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{source} already exists")
end end
if Upload.where(source: normalized_source).where("status like ?", "error%").exists? if Upload.where(source: source).where("status like ?", "error%").exists?
raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{normalized_source} already exists") raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{source} already exists")
end end
end end
params[:rating] ||= "q" params[:rating] ||= "q"
params[:tag_string] ||= "tagme" params[:tag_string] ||= "tagme"
upload = Upload.create!(params) upload = Upload.create!(params)
begin begin
upload.update(status: "preprocessing") upload.update(status: "preprocessing")
if Utils.is_downloadable?(source) if params[:file].present?
# preserve the original source (for twitter, the twimg:orig
# source, while the status url is stored in upload.source)
upload.alt_source = normalized_source
file = Utils.download_for_upload(source, upload)
elsif params[:file].present?
file = params[:file] file = params[:file]
elsif Utils.is_downloadable?(source)
file = Utils.download_for_upload(upload)
end end
Utils.process_file(upload, file, original_post_id: original_post_id) Utils.process_file(upload, file, original_post_id: original_post_id)
@@ -109,10 +117,7 @@ class UploadService
# goto whoever submitted the form # goto whoever submitted the form
pred.initialize_attributes pred.initialize_attributes
# we went through a lot of trouble normalizing the source, pred.attributes = self.params
# so don't overwrite it with whatever the user provided
pred.source = "" if pred.source.nil?
pred.attributes = self.params.except(:source)
# if a file was uploaded after the preprocessing occurred, # if a file was uploaded after the preprocessing occurred,
# then process the file and overwrite whatever the preprocessor # then process the file and overwrite whatever the preprocessor

View File

@@ -74,8 +74,8 @@ class UploadService
if replacement.replacement_file.present? if replacement.replacement_file.present?
replacement.replacement_url = "file://#{replacement.replacement_file.original_filename}" replacement.replacement_url = "file://#{replacement.replacement_file.original_filename}"
elsif upload.downloaded_source.present? elsif upload.source.present?
replacement.replacement_url = upload.downloaded_source replacement.replacement_url = Sources::Strategies.canonical(upload.source, upload.referer_url)
end end
if md5_changed if md5_changed
@@ -93,7 +93,7 @@ class UploadService
post.image_width = upload.image_width post.image_width = upload.image_width
post.image_height = upload.image_height post.image_height = upload.image_height
post.file_size = upload.file_size post.file_size = upload.file_size
post.source = upload.downloaded_source || upload.source post.source = Sources::Strategies.canonical(upload.source, upload.referer_url)
post.tag_string = upload.tag_string post.tag_string = upload.tag_string
update_ugoira_frame_data(post, upload) update_ugoira_frame_data(post, upload)

View File

@@ -200,37 +200,19 @@ class UploadService
tags.join(" ") tags.join(" ")
end end
def download_from_source(source, referer_url: nil) def download_for_upload(upload)
download = Downloads::File.new(source, referer_url: referer_url) download = Downloads::File.new(upload.source, upload.referer_url)
file, strategy = download.download!
file = download.download! if download.data[:ugoira_frame_data]
context = { upload.context = {
downloaded_source: download.downloaded_source, "ugoira" => {
source: download.source "frame_data" => download.data[:ugoira_frame_data],
"content_type" => "image/jpeg"
} }
if download.data[:is_ugoira]
context[:ugoira] = {
frame_data: download.data[:ugoira_frame_data],
content_type: download.data[:ugoira_content_type]
} }
end end
yield(context)
return file
end
def download_for_upload(source, upload)
file = download_from_source(source, referer_url: upload.referer_url) do |context|
upload.downloaded_source = context[:downloaded_source]
upload.source = context[:source]
if context[:ugoira]
upload.context = { ugoira: context[:ugoira] }
end
end
return file return file
end end
end end

View File

@@ -152,7 +152,7 @@ class Artist < ApplicationRecord
url = ArtistUrl.normalize(url) url = ArtistUrl.normalize(url)
artists = [] artists = []
# return [] unless Sources::Site.new(url).normalized_for_artist_finder? # return [] unless Sources::Strategies.find(url).normalized_for_artist_finder?
while artists.empty? && url.size > 10 while artists.empty? && url.size > 10
u = url.sub(/\/+$/, "") + "/" u = url.sub(/\/+$/, "") + "/"
@@ -481,13 +481,8 @@ class Artist < ApplicationRecord
end end
def search_for_profile(url) def search_for_profile(url)
source = Sources::Site.new(url) source = Sources::Strategies.find(url)
if source.strategy
source.get
find_all_by_url(source.profile_url) find_all_by_url(source.profile_url)
else
nil
end
rescue Net::OpenTimeout, PixivApiClient::Error rescue Net::OpenTimeout, PixivApiClient::Error
raise if Rails.env.test? raise if Rails.env.test?
nil nil

View File

@@ -22,7 +22,7 @@ class ArtistUrl < ApplicationRecord
url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2") url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2") url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2") url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/') # url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/") url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2') url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2')
@@ -32,11 +32,15 @@ class ArtistUrl < ApplicationRecord
url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com! url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
begin begin
url = Sources::Site.new(url).normalize_for_artist_finder! source = Sources::Strategies.find(url)
if !source.normalized_for_artist_finder? && source.normalizable_for_artist_finder?
url = source.normalize_for_artist_finder
end
rescue Net::OpenTimeout, PixivApiClient::Error rescue Net::OpenTimeout, PixivApiClient::Error
raise if Rails.env.test? raise if Rails.env.test?
rescue Sources::Site::NoStrategyError
end end
url = url.gsub(/\/+\Z/, "") url = url.gsub(/\/+\Z/, "")
url = url.gsub(%r!^https://!, "http://") url = url.gsub(%r!^https://!, "http://")
url + "/" url + "/"
@@ -102,10 +106,6 @@ class ArtistUrl < ApplicationRecord
end end
def normalize def normalize
if !Sources::Site.new(normalized_url).normalized_for_artist_finder?
self.normalized_url = self.class.normalize(url)
end
rescue Sources::Site::NoStrategyError
self.normalized_url = self.class.normalize(url) self.normalized_url = self.class.normalize(url)
end end

View File

@@ -1682,7 +1682,11 @@ class Post < ApplicationRecord
module PixivMethods module PixivMethods
def parse_pixiv_id def parse_pixiv_id
self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id_from_url self.pixiv_id = nil
if Sources::Strategies::Pixiv.match?(source)
self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id
end
end end
end end
@@ -1790,10 +1794,8 @@ class Post < ApplicationRecord
return if has_tag?("artist_request") || has_tag?("official_art") return if has_tag?("artist_request") || has_tag?("official_art")
return if tags.any? { |t| t.category == Tag.categories.artist } return if tags.any? { |t| t.category == Tag.categories.artist }
site = Sources::Site.new(source) site = Sources::Strategies.find(source)
self.warnings[:base] << "Artist tag is required. Create a new tag with [[artist:<artist_name>]]. Ask on the forum if you need naming help" self.warnings[:base] << "Artist tag is required. Create a new tag with [[artist:<artist_name>]]. Ask on the forum if you need naming help"
rescue Sources::Site::NoStrategyError => e
# unrecognized source; do nothing.
end end
def has_copyright_tag def has_copyright_tag

View File

@@ -47,8 +47,7 @@ class Upload < ApplicationRecord
end end
attr_accessor :as_pending, attr_accessor :as_pending, :replaced_post, :file
:referer_url, :downloaded_source, :replaced_post, :file
belongs_to :uploader, :class_name => "User" belongs_to :uploader, :class_name => "User"
belongs_to :post, optional: true belongs_to :post, optional: true
@@ -63,6 +62,7 @@ class Upload < ApplicationRecord
validates :file_ext, format: { with: /jpg|gif|png|swf|webm|mp4|zip/ }, allow_nil: true validates :file_ext, format: { with: /jpg|gif|png|swf|webm|mp4|zip/ }, allow_nil: true
validates_with Validator validates_with Validator
serialize :context, JSON serialize :context, JSON
scope :preprocessed, -> { where(status: "preprocessed") }
def initialize_attributes def initialize_attributes
self.uploader_id = CurrentUser.id self.uploader_id = CurrentUser.id

View File

@@ -1,8 +1,8 @@
<% if params[:url] %> <% if params[:url] %>
<% if ImageProxy.needs_proxy?(@normalized_url) %> <% if ImageProxy.needs_proxy?(@source.image_url) %>
<%= image_tag(image_proxy_uploads_path(:url => @normalized_url), :title => "Preview", :id => "image") %> <%= image_tag(image_proxy_uploads_path(:url => @source.image_url), :title => "Preview", :id => "image") %>
<% else %> <% else %>
<%= image_tag(@normalized_url, :title => "Preview", :id => "image") %> <%= image_tag(@source.image_url, :title => "Preview", :id => "image") %>
<% end %> <% end %>
<ul> <ul>

View File

@@ -47,11 +47,10 @@
</span> </span>
<br> <br>
<% if upload.alt_source.present? %> <% if upload.referer_url.present? %>
<span class="info"> <span class="info">
<strong>Alternate Source</strong> <strong>Referer</strong>
<%= link_to_if (upload.alt_source =~ %r!\Ahttps?://!i), (upload.alt_source.presence.try(:truncate, 50) || content_tag(:em, "none")), upload.source %> <%= URI.parse(upload.referer_url).host rescue nil %>
<%= link_to "»", uploads_path(search: params[:search].merge(source_matches: upload.alt_source)) %>
</span> </span>
<br> <br>
<% end %> <% end %>

View File

@@ -20,9 +20,7 @@
<%= form_for(@upload, :html => {:multipart => true, :class => "simple_form", :id => "form"}) do |f| %> <%= form_for(@upload, :html => {:multipart => true, :class => "simple_form", :id => "form"}) do |f| %>
<%= hidden_field_tag :url, params[:url] %> <%= hidden_field_tag :url, params[:url] %>
<%= hidden_field_tag :ref, params[:ref] %> <%= hidden_field_tag :ref, params[:ref] %>
<%= hidden_field_tag :normalized_url, @normalized_url %>
<%= f.hidden_field :md5_confirmation %> <%= f.hidden_field :md5_confirmation %>
<%= f.hidden_field :referer_url, :value => @source.try(:referer_url) %>
<% if CurrentUser.can_upload_free? %> <% if CurrentUser.can_upload_free? %>
<div class="input"> <div class="input">

View File

@@ -49,6 +49,7 @@ services:
- DANBOORU_NICO_SEIGA_LOGIN - DANBOORU_NICO_SEIGA_LOGIN
- DANBOORU_NICO_SEIGA_PASSWORD - DANBOORU_NICO_SEIGA_PASSWORD
- DANBOORU_PERSIST_PIXIV_SESSION - DANBOORU_PERSIST_PIXIV_SESSION
- DANBOORU_TUMBLR_CONSUMER_KEY
- CIRCLE_NODE_TOTAL - CIRCLE_NODE_TOTAL
- CIRCLE_NODE_INDEX - CIRCLE_NODE_INDEX
- CIRCLE_BUILD_IMAGE - CIRCLE_BUILD_IMAGE

View File

@@ -0,0 +1,5 @@
class RenameAltSourceOnUploads < ActiveRecord::Migration[5.2]
def change
rename_column :uploads, :alt_source, :referer_url
end
end

View File

@@ -433,8 +433,8 @@ CREATE TABLE public.advertisement_hits (
id integer NOT NULL, id integer NOT NULL,
advertisement_id integer NOT NULL, advertisement_id integer NOT NULL,
ip_addr inet NOT NULL, ip_addr inet NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -464,15 +464,15 @@ ALTER SEQUENCE public.advertisement_hits_id_seq OWNED BY public.advertisement_hi
CREATE TABLE public.advertisements ( CREATE TABLE public.advertisements (
id integer NOT NULL, id integer NOT NULL,
referral_url text NOT NULL, referral_url text NOT NULL,
ad_type character varying(255) NOT NULL, ad_type character varying NOT NULL,
status character varying(255) NOT NULL, status character varying NOT NULL,
hit_count integer DEFAULT 0 NOT NULL, hit_count integer DEFAULT 0 NOT NULL,
width integer NOT NULL, width integer NOT NULL,
height integer NOT NULL, height integer NOT NULL,
file_name character varying(255) NOT NULL, file_name character varying NOT NULL,
is_work_safe boolean DEFAULT false NOT NULL, is_work_safe boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -502,8 +502,8 @@ ALTER SEQUENCE public.advertisements_id_seq OWNED BY public.advertisements.id;
CREATE TABLE public.amazon_backups ( CREATE TABLE public.amazon_backups (
id integer NOT NULL, id integer NOT NULL,
last_id integer, last_id integer,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -564,7 +564,7 @@ ALTER SEQUENCE public.anti_voters_id_seq OWNED BY public.anti_voters.id;
CREATE TABLE public.api_keys ( CREATE TABLE public.api_keys (
id integer NOT NULL, id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
key character varying(255) NOT NULL, key character varying NOT NULL,
created_at timestamp without time zone, created_at timestamp without time zone,
updated_at timestamp without time zone updated_at timestamp without time zone
); );
@@ -612,8 +612,8 @@ CREATE TABLE public.artist_commentaries (
original_description text DEFAULT ''::text NOT NULL, original_description text DEFAULT ''::text NOT NULL,
translated_title text DEFAULT ''::text NOT NULL, translated_title text DEFAULT ''::text NOT NULL,
translated_description text DEFAULT ''::text NOT NULL, translated_description text DEFAULT ''::text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -649,8 +649,8 @@ CREATE TABLE public.artist_commentary_versions (
original_description text, original_description text,
translated_title text, translated_title text,
translated_description text, translated_description text,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -682,8 +682,8 @@ CREATE TABLE public.artist_urls (
artist_id integer NOT NULL, artist_id integer NOT NULL,
url text NOT NULL, url text NOT NULL,
normalized_url text NOT NULL, normalized_url text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
is_active boolean DEFAULT true NOT NULL is_active boolean DEFAULT true NOT NULL
); );
@@ -714,16 +714,16 @@ ALTER SEQUENCE public.artist_urls_id_seq OWNED BY public.artist_urls.id;
CREATE TABLE public.artist_versions ( CREATE TABLE public.artist_versions (
id integer NOT NULL, id integer NOT NULL,
artist_id integer NOT NULL, artist_id integer NOT NULL,
name character varying(255) NOT NULL, name character varying NOT NULL,
updater_id integer NOT NULL, updater_id integer NOT NULL,
updater_ip_addr inet NOT NULL, updater_ip_addr inet NOT NULL,
is_active boolean DEFAULT true NOT NULL, is_active boolean DEFAULT true NOT NULL,
other_names text, other_names text,
group_name character varying(255), group_name character varying,
url_string text, url_string text,
is_banned boolean DEFAULT false NOT NULL, is_banned boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -752,15 +752,15 @@ ALTER SEQUENCE public.artist_versions_id_seq OWNED BY public.artist_versions.id;
CREATE TABLE public.artists ( CREATE TABLE public.artists (
id integer NOT NULL, id integer NOT NULL,
name character varying(255) NOT NULL, name character varying NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
is_active boolean DEFAULT true NOT NULL, is_active boolean DEFAULT true NOT NULL,
is_banned boolean DEFAULT false NOT NULL, is_banned boolean DEFAULT false NOT NULL,
other_names text, other_names text,
other_names_index tsvector, other_names_index tsvector,
group_name character varying(255), group_name character varying,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -793,8 +793,8 @@ CREATE TABLE public.bans (
reason text NOT NULL, reason text NOT NULL,
banner_id integer NOT NULL, banner_id integer NOT NULL,
expires_at timestamp without time zone NOT NULL, expires_at timestamp without time zone NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -826,7 +826,7 @@ CREATE TABLE public.bulk_update_requests (
user_id integer NOT NULL, user_id integer NOT NULL,
forum_topic_id integer, forum_topic_id integer,
script text NOT NULL, script text NOT NULL,
status character varying(255) DEFAULT 'pending'::character varying NOT NULL, status character varying DEFAULT 'pending'::character varying NOT NULL,
created_at timestamp without time zone, created_at timestamp without time zone,
updated_at timestamp without time zone, updated_at timestamp without time zone,
approver_id integer, approver_id integer,
@@ -863,8 +863,8 @@ CREATE TABLE public.comment_votes (
comment_id integer NOT NULL, comment_id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
score integer NOT NULL, score integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -899,8 +899,8 @@ CREATE TABLE public.comments (
ip_addr inet NOT NULL, ip_addr inet NOT NULL,
body_index tsvector NOT NULL, body_index tsvector NOT NULL,
score integer DEFAULT 0 NOT NULL, score integer DEFAULT 0 NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
updater_id integer, updater_id integer,
updater_ip_addr inet, updater_ip_addr inet,
do_not_bump_post boolean DEFAULT false NOT NULL, do_not_bump_post boolean DEFAULT false NOT NULL,
@@ -941,10 +941,10 @@ CREATE TABLE public.delayed_jobs (
run_at timestamp without time zone, run_at timestamp without time zone,
locked_at timestamp without time zone, locked_at timestamp without time zone,
failed_at timestamp without time zone, failed_at timestamp without time zone,
locked_by character varying(255), locked_by character varying,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
queue character varying(255) queue character varying
); );
@@ -1013,8 +1013,8 @@ CREATE TABLE public.dmails (
message_index tsvector NOT NULL, message_index tsvector NOT NULL,
is_read boolean DEFAULT false NOT NULL, is_read boolean DEFAULT false NOT NULL,
is_deleted boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
creator_ip_addr inet NOT NULL, creator_ip_addr inet NOT NULL,
is_spam boolean DEFAULT false is_spam boolean DEFAULT false
); );
@@ -2149,8 +2149,8 @@ CREATE TABLE public.forum_posts (
body text NOT NULL, body text NOT NULL,
text_index tsvector NOT NULL, text_index tsvector NOT NULL,
is_deleted boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2182,7 +2182,7 @@ CREATE TABLE public.forum_subscriptions (
user_id integer, user_id integer,
forum_topic_id integer, forum_topic_id integer,
last_read_at timestamp without time zone, last_read_at timestamp without time zone,
delete_key character varying(255) delete_key character varying
); );
@@ -2246,14 +2246,14 @@ CREATE TABLE public.forum_topics (
id integer NOT NULL, id integer NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
updater_id integer NOT NULL, updater_id integer NOT NULL,
title character varying(255) NOT NULL, title character varying NOT NULL,
response_count integer DEFAULT 0 NOT NULL, response_count integer DEFAULT 0 NOT NULL,
is_sticky boolean DEFAULT false NOT NULL, is_sticky boolean DEFAULT false NOT NULL,
is_locked boolean DEFAULT false NOT NULL, is_locked boolean DEFAULT false NOT NULL,
is_deleted boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL,
text_index tsvector NOT NULL, text_index tsvector NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
category_id integer DEFAULT 0 NOT NULL, category_id integer DEFAULT 0 NOT NULL,
min_level integer DEFAULT 0 NOT NULL min_level integer DEFAULT 0 NOT NULL
); );
@@ -2287,8 +2287,8 @@ CREATE TABLE public.ip_bans (
creator_id integer NOT NULL, creator_id integer NOT NULL,
ip_addr inet NOT NULL, ip_addr inet NOT NULL,
reason text NOT NULL, reason text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2320,9 +2320,9 @@ CREATE TABLE public.janitor_trials (
creator_id integer NOT NULL, creator_id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
original_level integer, original_level integer,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
status character varying(255) DEFAULT 'active'::character varying NOT NULL status character varying DEFAULT 'active'::character varying NOT NULL
); );
@@ -2353,8 +2353,8 @@ CREATE TABLE public.mod_actions (
id integer NOT NULL, id integer NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
description text NOT NULL, description text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
category integer category integer
); );
@@ -2387,8 +2387,8 @@ CREATE TABLE public.news_updates (
message text NOT NULL, message text NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
updater_id integer NOT NULL, updater_id integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2427,8 +2427,8 @@ CREATE TABLE public.note_versions (
height integer NOT NULL, height integer NOT NULL,
is_active boolean DEFAULT true NOT NULL, is_active boolean DEFAULT true NOT NULL,
body text NOT NULL, body text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
version integer DEFAULT 0 NOT NULL version integer DEFAULT 0 NOT NULL
); );
@@ -2467,8 +2467,8 @@ CREATE TABLE public.notes (
is_active boolean DEFAULT true NOT NULL, is_active boolean DEFAULT true NOT NULL,
body text NOT NULL, body text NOT NULL,
body_index tsvector NOT NULL, body_index tsvector NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
version integer DEFAULT 0 NOT NULL version integer DEFAULT 0 NOT NULL
); );
@@ -2500,7 +2500,7 @@ CREATE TABLE public.pixiv_ugoira_frame_data (
id integer NOT NULL, id integer NOT NULL,
post_id integer, post_id integer,
data text NOT NULL, data text NOT NULL,
content_type character varying(255) NOT NULL content_type character varying NOT NULL
); );
@@ -2529,16 +2529,16 @@ ALTER SEQUENCE public.pixiv_ugoira_frame_data_id_seq OWNED BY public.pixiv_ugoir
CREATE TABLE public.pools ( CREATE TABLE public.pools (
id integer NOT NULL, id integer NOT NULL,
name character varying(255), name character varying,
creator_id integer NOT NULL, creator_id integer NOT NULL,
description text, description text,
is_active boolean DEFAULT true NOT NULL, is_active boolean DEFAULT true NOT NULL,
post_ids text DEFAULT ''::text NOT NULL, post_ids text DEFAULT ''::text NOT NULL,
post_count integer DEFAULT 0 NOT NULL, post_count integer DEFAULT 0 NOT NULL,
is_deleted boolean DEFAULT false NOT NULL, is_deleted boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
category character varying(255) DEFAULT 'series'::character varying NOT NULL category character varying DEFAULT 'series'::character varying NOT NULL
); );
@@ -2571,8 +2571,8 @@ CREATE TABLE public.post_appeals (
creator_id integer NOT NULL, creator_id integer NOT NULL,
creator_ip_addr inet, creator_ip_addr inet,
reason text, reason text,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2635,9 +2635,9 @@ CREATE TABLE public.post_disapprovals (
id integer NOT NULL, id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
post_id integer NOT NULL, post_id integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
reason character varying(255) DEFAULT 'legacy'::character varying, reason character varying DEFAULT 'legacy'::character varying,
message text message text
); );
@@ -2672,8 +2672,8 @@ CREATE TABLE public.post_flags (
creator_ip_addr inet NOT NULL, creator_ip_addr inet NOT NULL,
reason text, reason text,
is_resolved boolean DEFAULT false NOT NULL, is_resolved boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2758,8 +2758,8 @@ CREATE TABLE public.post_votes (
post_id integer NOT NULL, post_id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
score integer NOT NULL, score integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -2788,13 +2788,13 @@ ALTER SEQUENCE public.post_votes_id_seq OWNED BY public.post_votes.id;
CREATE TABLE public.posts ( CREATE TABLE public.posts (
id integer NOT NULL, id integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
up_score integer DEFAULT 0 NOT NULL, up_score integer DEFAULT 0 NOT NULL,
down_score integer DEFAULT 0 NOT NULL, down_score integer DEFAULT 0 NOT NULL,
score integer DEFAULT 0 NOT NULL, score integer DEFAULT 0 NOT NULL,
source character varying(255) DEFAULT ''::character varying NOT NULL, source character varying DEFAULT ''::character varying NOT NULL,
md5 character varying(255) NOT NULL, md5 character varying NOT NULL,
rating character(1) DEFAULT 'q'::bpchar NOT NULL, rating character(1) DEFAULT 'q'::bpchar NOT NULL,
is_note_locked boolean DEFAULT false NOT NULL, is_note_locked boolean DEFAULT false NOT NULL,
is_rating_locked boolean DEFAULT false NOT NULL, is_rating_locked boolean DEFAULT false NOT NULL,
@@ -2817,7 +2817,7 @@ CREATE TABLE public.posts (
tag_count_artist integer DEFAULT 0 NOT NULL, tag_count_artist integer DEFAULT 0 NOT NULL,
tag_count_character integer DEFAULT 0 NOT NULL, tag_count_character integer DEFAULT 0 NOT NULL,
tag_count_copyright integer DEFAULT 0 NOT NULL, tag_count_copyright integer DEFAULT 0 NOT NULL,
file_ext character varying(255) NOT NULL, file_ext character varying NOT NULL,
file_size integer NOT NULL, file_size integer NOT NULL,
image_width integer NOT NULL, image_width integer NOT NULL,
image_height integer NOT NULL, image_height integer NOT NULL,
@@ -2890,7 +2890,7 @@ ALTER SEQUENCE public.saved_searches_id_seq OWNED BY public.saved_searches.id;
-- --
CREATE TABLE public.schema_migrations ( CREATE TABLE public.schema_migrations (
version character varying(255) NOT NULL version character varying NOT NULL
); );
@@ -2931,14 +2931,14 @@ ALTER SEQUENCE public.super_voters_id_seq OWNED BY public.super_voters.id;
CREATE TABLE public.tag_aliases ( CREATE TABLE public.tag_aliases (
id integer NOT NULL, id integer NOT NULL,
antecedent_name character varying(255) NOT NULL, antecedent_name character varying NOT NULL,
consequent_name character varying(255) NOT NULL, consequent_name character varying NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
creator_ip_addr inet NOT NULL, creator_ip_addr inet NOT NULL,
forum_topic_id integer, forum_topic_id integer,
status text DEFAULT 'pending'::text NOT NULL, status text DEFAULT 'pending'::text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
post_count integer DEFAULT 0 NOT NULL, post_count integer DEFAULT 0 NOT NULL,
approver_id integer, approver_id integer,
forum_post_id integer forum_post_id integer
@@ -2970,15 +2970,15 @@ ALTER SEQUENCE public.tag_aliases_id_seq OWNED BY public.tag_aliases.id;
CREATE TABLE public.tag_implications ( CREATE TABLE public.tag_implications (
id integer NOT NULL, id integer NOT NULL,
antecedent_name character varying(255) NOT NULL, antecedent_name character varying NOT NULL,
consequent_name character varying(255) NOT NULL, consequent_name character varying NOT NULL,
descendant_names text NOT NULL, descendant_names text NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
creator_ip_addr inet NOT NULL, creator_ip_addr inet NOT NULL,
forum_topic_id integer, forum_topic_id integer,
status text DEFAULT 'pending'::text NOT NULL, status text DEFAULT 'pending'::text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
approver_id integer, approver_id integer,
forum_post_id integer forum_post_id integer
); );
@@ -3010,14 +3010,14 @@ ALTER SEQUENCE public.tag_implications_id_seq OWNED BY public.tag_implications.i
CREATE TABLE public.tag_subscriptions ( CREATE TABLE public.tag_subscriptions (
id integer NOT NULL, id integer NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
name character varying(255) NOT NULL, name character varying NOT NULL,
tag_query text NOT NULL, tag_query text NOT NULL,
post_ids text NOT NULL, post_ids text NOT NULL,
is_public boolean DEFAULT true NOT NULL, is_public boolean DEFAULT true NOT NULL,
last_accessed_at timestamp without time zone, last_accessed_at timestamp without time zone,
is_opted_in boolean DEFAULT false NOT NULL, is_opted_in boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -3046,7 +3046,7 @@ ALTER SEQUENCE public.tag_subscriptions_id_seq OWNED BY public.tag_subscriptions
CREATE TABLE public.tags ( CREATE TABLE public.tags (
id integer NOT NULL, id integer NOT NULL,
name character varying(255) NOT NULL, name character varying NOT NULL,
post_count integer DEFAULT 0 NOT NULL, post_count integer DEFAULT 0 NOT NULL,
category integer DEFAULT 0 NOT NULL, category integer DEFAULT 0 NOT NULL,
related_tags text, related_tags text,
@@ -3094,8 +3094,8 @@ CREATE UNLOGGED TABLE public.token_buckets (
CREATE TABLE public.uploads ( CREATE TABLE public.uploads (
id integer NOT NULL, id integer NOT NULL,
source text, source text,
file_path character varying(255), file_path character varying,
content_type character varying(255), content_type character varying,
rating character(1) NOT NULL, rating character(1) NOT NULL,
uploader_id integer NOT NULL, uploader_id integer NOT NULL,
uploader_ip_addr inet NOT NULL, uploader_ip_addr inet NOT NULL,
@@ -3103,9 +3103,9 @@ CREATE TABLE public.uploads (
status text DEFAULT 'pending'::text NOT NULL, status text DEFAULT 'pending'::text NOT NULL,
backtrace text, backtrace text,
post_id integer, post_id integer,
md5_confirmation character varying(255), md5_confirmation character varying,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
server text, server text,
parent_id integer, parent_id integer,
md5 character varying, md5 character varying,
@@ -3117,7 +3117,7 @@ CREATE TABLE public.uploads (
artist_commentary_title text, artist_commentary_title text,
include_artist_commentary boolean, include_artist_commentary boolean,
context text, context text,
alt_source text referer_url text
); );
@@ -3148,10 +3148,10 @@ CREATE TABLE public.user_feedback (
id integer NOT NULL, id integer NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
category character varying(255) NOT NULL, category character varying NOT NULL,
body text NOT NULL, body text NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -3180,15 +3180,15 @@ ALTER SEQUENCE public.user_feedback_id_seq OWNED BY public.user_feedback.id;
CREATE TABLE public.user_name_change_requests ( CREATE TABLE public.user_name_change_requests (
id integer NOT NULL, id integer NOT NULL,
status character varying(255) DEFAULT 'pending'::character varying NOT NULL, status character varying DEFAULT 'pending'::character varying NOT NULL,
user_id integer NOT NULL, user_id integer NOT NULL,
approver_id integer, approver_id integer,
original_name character varying(255), original_name character varying,
desired_name character varying(255), desired_name character varying,
change_reason text, change_reason text,
rejection_reason text, rejection_reason text,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -3217,10 +3217,10 @@ ALTER SEQUENCE public.user_name_change_requests_id_seq OWNED BY public.user_name
CREATE TABLE public.user_password_reset_nonces ( CREATE TABLE public.user_password_reset_nonces (
id integer NOT NULL, id integer NOT NULL,
key character varying(255) NOT NULL, key character varying NOT NULL,
email character varying(255) NOT NULL, email character varying NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL updated_at timestamp without time zone
); );
@@ -3249,12 +3249,12 @@ ALTER SEQUENCE public.user_password_reset_nonces_id_seq OWNED BY public.user_pas
CREATE TABLE public.users ( CREATE TABLE public.users (
id integer NOT NULL, id integer NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
name character varying(255) NOT NULL, name character varying NOT NULL,
password_hash character varying(255) NOT NULL, password_hash character varying NOT NULL,
email character varying(255), email character varying,
email_verification_key character varying(255), email_verification_key character varying,
inviter_id integer, inviter_id integer,
level integer DEFAULT 0 NOT NULL, level integer DEFAULT 0 NOT NULL,
base_upload_limit integer DEFAULT 10 NOT NULL, base_upload_limit integer DEFAULT 10 NOT NULL,
@@ -3266,13 +3266,13 @@ CREATE TABLE public.users (
note_update_count integer DEFAULT 0 NOT NULL, note_update_count integer DEFAULT 0 NOT NULL,
favorite_count integer DEFAULT 0 NOT NULL, favorite_count integer DEFAULT 0 NOT NULL,
comment_threshold integer DEFAULT '-1'::integer NOT NULL, comment_threshold integer DEFAULT '-1'::integer NOT NULL,
default_image_size character varying(255) DEFAULT 'large'::character varying NOT NULL, default_image_size character varying DEFAULT 'large'::character varying NOT NULL,
favorite_tags text, favorite_tags text,
blacklisted_tags text DEFAULT 'spoilers blacklisted_tags text DEFAULT 'spoilers
guro guro
scat scat
furry -rating:s'::text, furry -rating:s'::text,
time_zone character varying(255) DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL, time_zone character varying DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL,
bcrypt_password_hash text, bcrypt_password_hash text,
per_page integer DEFAULT 20 NOT NULL, per_page integer DEFAULT 20 NOT NULL,
custom_style text, custom_style text,
@@ -3310,11 +3310,11 @@ CREATE TABLE public.wiki_page_versions (
wiki_page_id integer NOT NULL, wiki_page_id integer NOT NULL,
updater_id integer NOT NULL, updater_id integer NOT NULL,
updater_ip_addr inet NOT NULL, updater_ip_addr inet NOT NULL,
title character varying(255) NOT NULL, title character varying NOT NULL,
body text NOT NULL, body text NOT NULL,
is_locked boolean NOT NULL, is_locked boolean NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
other_names text, other_names text,
is_deleted boolean DEFAULT false NOT NULL is_deleted boolean DEFAULT false NOT NULL
); );
@@ -3346,12 +3346,12 @@ ALTER SEQUENCE public.wiki_page_versions_id_seq OWNED BY public.wiki_page_versio
CREATE TABLE public.wiki_pages ( CREATE TABLE public.wiki_pages (
id integer NOT NULL, id integer NOT NULL,
creator_id integer NOT NULL, creator_id integer NOT NULL,
title character varying(255) NOT NULL, title character varying NOT NULL,
body text NOT NULL, body text NOT NULL,
body_index tsvector NOT NULL, body_index tsvector NOT NULL,
is_locked boolean DEFAULT false NOT NULL, is_locked boolean DEFAULT false NOT NULL,
created_at timestamp without time zone NOT NULL, created_at timestamp without time zone,
updated_at timestamp without time zone NOT NULL, updated_at timestamp without time zone,
updater_id integer, updater_id integer,
other_names text, other_names text,
other_names_index tsvector, other_names_index tsvector,
@@ -4770,6 +4770,14 @@ ALTER TABLE ONLY public.saved_searches
ADD CONSTRAINT saved_searches_pkey PRIMARY KEY (id); ADD CONSTRAINT saved_searches_pkey PRIMARY KEY (id);
--
-- Name: schema_migrations schema_migrations_pkey; Type: CONSTRAINT; Schema: public; Owner: -
--
ALTER TABLE ONLY public.schema_migrations
ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version);
-- --
-- Name: super_voters super_voters_pkey; Type: CONSTRAINT; Schema: public; Owner: - -- Name: super_voters super_voters_pkey; Type: CONSTRAINT; Schema: public; Owner: -
-- --
@@ -6994,6 +7002,13 @@ CREATE INDEX index_posts_on_parent_id ON public.posts USING btree (parent_id);
CREATE INDEX index_posts_on_pixiv_id ON public.posts USING btree (pixiv_id) WHERE (pixiv_id IS NOT NULL); CREATE INDEX index_posts_on_pixiv_id ON public.posts USING btree (pixiv_id) WHERE (pixiv_id IS NOT NULL);
--
-- Name: index_posts_on_source; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_posts_on_source ON public.posts USING btree (lower((source)::text));
-- --
-- Name: index_posts_on_source_pattern; Type: INDEX; Schema: public; Owner: - -- Name: index_posts_on_source_pattern; Type: INDEX; Schema: public; Owner: -
-- --
@@ -7015,6 +7030,13 @@ CREATE INDEX index_posts_on_tags_index ON public.posts USING gin (tag_index);
CREATE INDEX index_posts_on_uploader_id ON public.posts USING btree (uploader_id); CREATE INDEX index_posts_on_uploader_id ON public.posts USING btree (uploader_id);
--
-- Name: index_posts_on_uploader_ip_addr; Type: INDEX; Schema: public; Owner: -
--
CREATE INDEX index_posts_on_uploader_ip_addr ON public.posts USING btree (uploader_ip_addr);
-- --
-- Name: index_saved_searches_on_labels; Type: INDEX; Schema: public; Owner: - -- Name: index_saved_searches_on_labels; Type: INDEX; Schema: public; Owner: -
-- --
@@ -7121,10 +7143,10 @@ CREATE UNIQUE INDEX index_token_buckets_on_user_id ON public.token_buckets USING
-- --
-- Name: index_uploads_on_alt_source; Type: INDEX; Schema: public; Owner: - -- Name: index_uploads_on_referer_url; Type: INDEX; Schema: public; Owner: -
-- --
CREATE INDEX index_uploads_on_alt_source ON public.uploads USING btree (alt_source); CREATE INDEX index_uploads_on_referer_url ON public.uploads USING btree (referer_url);
-- --
@@ -7267,13 +7289,6 @@ CREATE INDEX index_wiki_pages_on_title_pattern ON public.wiki_pages USING btree
CREATE INDEX index_wiki_pages_on_updated_at ON public.wiki_pages USING btree (updated_at); CREATE INDEX index_wiki_pages_on_updated_at ON public.wiki_pages USING btree (updated_at);
--
-- Name: unique_schema_migrations; Type: INDEX; Schema: public; Owner: -
--
CREATE UNIQUE INDEX unique_schema_migrations ON public.schema_migrations USING btree (version);
-- --
-- Name: favorites insert_favorites_trigger; Type: TRIGGER; Schema: public; Owner: - -- Name: favorites insert_favorites_trigger; Type: TRIGGER; Schema: public; Owner: -
-- --
@@ -7502,13 +7517,13 @@ INSERT INTO "schema_migrations" (version) VALUES
('20171230220225'), ('20171230220225'),
('20180113211343'), ('20180113211343'),
('20180116001101'), ('20180116001101'),
('20180310070233'),
('20180403231351'), ('20180403231351'),
('20180413224239'), ('20180413224239'),
('20180425194016'), ('20180425194016'),
('20180516222413'), ('20180516222413'),
('20180517190048'), ('20180517190048'),
('20180518175154'), ('20180518175154'),
('20180804203201'); ('20180804203201'),
('20180816230604');

View File

@@ -87,7 +87,7 @@ namespace :images do
post = Post.find(post_id) post = Post.find(post_id)
post.source =~ /(\d{5,})/ post.source =~ /(\d{5,})/
if illust_id = $1 if illust_id = $1
response = PixivApiClient.new.works(illust_id) response = PixivApiClient.new.work(illust_id)
upload = Upload.new upload = Upload.new
upload.source = response.pages.first upload.source = response.pages.first
upload.file_ext = post.file_ext upload.file_ext = post.file_ext

View File

@@ -68,6 +68,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
end end
end end
context "for a direct link twitter post" do
setup do
@ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881"
@source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"
end
should "trigger the preprocessor" do
assert_difference(-> { Upload.preprocessed.count }, 1) do
get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref}
Delayed::Worker.new.work_off
end
end
end
context "for a twitter post" do context "for a twitter post" do
setup do setup do
@source = "https://twitter.com/frappuccino/status/566030116182949888" @source = "https://twitter.com/frappuccino/status/566030116182949888"
@@ -89,6 +103,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
end end
end end
context "for a pixiv post" do
setup do
@ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482"
@source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
end
should "trigger the preprocessor" do
assert_difference(-> { Upload.preprocessed.count }, 1) do
get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref}
Delayed::Worker.new.work_off
end
end
end
context "for a post that has already been uploaded" do context "for a post that has already been uploaded" do
setup do setup do
as_user do as_user do
@@ -149,6 +177,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
end end
context "create action" do context "create action" do
context "when a preprocessed upload already exists" do
context "for twitter" do
setup do
as_user do
@ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881"
@source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"
@upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg")
end
end
should "update the predecessor" do
assert_difference(->{ Post.count }, 1) do
assert_difference(->{ Upload.count }, 0) do
post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}}
end
end
post = Post.last
assert_match(/aaa/, post.tag_string)
end
end
context "for pixiv" do
setup do
@ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482"
@source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
as_user do
@upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg")
end
end
should "update the predecessor" do
assert_difference(->{ Post.count }, 1) do
assert_difference(->{ Upload.count }, 0) do
post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}}
end
end
post = Post.last
assert_match(/aaa/, post.tag_string)
end
end
end
should "create a new upload" do should "create a new upload" do
assert_difference("Upload.count", 1) do assert_difference("Upload.count", 1) do
file = Rack::Test::UploadedFile.new("#{Rails.root}/test/files/test.jpg", "image/jpeg") file = Rack::Test::UploadedFile.new("#{Rails.root}/test/files/test.jpg", "image/jpeg")

View File

@@ -17,34 +17,59 @@ class UploadServiceTest < ActiveSupport::TestCase
context "::Utils" do context "::Utils" do
subject { UploadService::Utils } subject { UploadService::Utils }
context "#download_from_source" do context "#download_for_upload" do
context "for a non-source site" do
setup do setup do
@jpeg = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg" @source = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg"
@ugoira = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip" @upload = Upload.new
@upload.source = @source
end end
should "work on a jpeg" do should "work on a jpeg" do
file = subject.download_from_source(@jpeg) do |context| file = subject.download_for_upload(@upload)
assert_not_nil(context[:downloaded_source])
assert_not_nil(context[:source])
end
assert_operator(File.size(file.path), :>, 0) assert_operator(File.size(file.path), :>, 0)
file.close file.close
end end
end
context "for a pixiv" do
setup do
@source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247350"
@upload = Upload.new
@upload.source = @source
end
should "work on an ugoira url" do should "work on an ugoira url" do
file = subject.download_from_source(@ugoira, referer_url: "https://www.pixiv.net") do |context| file = subject.download_for_upload(@upload)
assert_not_nil(context[:downloaded_source])
assert_not_nil(context[:source])
assert_not_nil(context[:ugoira])
end
assert_operator(File.size(file.path), :>, 0) assert_operator(File.size(file.path), :>, 0)
file.close file.close
end end
end end
context "for a pixiv ugoira" do
setup do
@source = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip"
@referer = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
@upload = Upload.new
@upload.source = @source
@upload.referer_url = @referer
end
should "work on an ugoira url" do
file = subject.download_for_upload(@upload)
assert_not_nil(@upload.context["ugoira"])
assert_operator(File.size(file.path), :>, 0)
file.close
end
end
end
context ".calculate_ugoira_dimensions" do context ".calculate_ugoira_dimensions" do
context "for a valid ugoira file" do context "for a valid ugoira file" do
setup do setup do
@@ -343,9 +368,6 @@ class UploadServiceTest < ActiveSupport::TestCase
FactoryBot.create(:user) FactoryBot.create(:user)
end end
CurrentUser.ip_addr = "127.0.0.1" CurrentUser.ip_addr = "127.0.0.1"
@jpeg = "https://raikou1.donmai.us/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
@ugoira = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
@video = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
end end
teardown do teardown do
@@ -356,65 +378,81 @@ class UploadServiceTest < ActiveSupport::TestCase
context "for twitter" do context "for twitter" do
setup do setup do
@source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
@norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
@ref = "https://twitter.com/nounproject/status/540944400767922176" @ref = "https://twitter.com/nounproject/status/540944400767922176"
end end
should "record the correct source when a referer is given" do should "download the file" do
@service = subject.new(source: @source, referer_url: @ref) @service = subject.new(source: @source, referer_url: @ref)
@upload = @service.start! @upload = @service.start!
assert_equal(@ref, @upload.source) assert_equal("preprocessed", @upload.status)
end assert_equal(9800, @upload.file_size)
assert_equal("png", @upload.file_ext)
should "save the twimg url in alt_source" do assert_equal("f5fe24f3a3a13885285f6627e04feec9", @upload.md5)
@service = subject.new(source: @source, referer_url: @ref) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :original)))
@upload = @service.start! assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :preview)))
assert_equal(@norm_source, @upload.alt_source)
end end
end end
context "for pixiv" do context "for pixiv" do
setup do setup do
@source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735" @source = "https://i.pximg.net/img-original/img/2014/10/29/09/27/19/46785915_p0.jpg"
@ref = "http://www.pixiv.net/member.php?id=696859" @ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46785915"
@direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
end end
should "record the correct source" do should "download the file" do
@service = subject.new(source: @source, referer_url: @ref) @service = subject.new(source: @source, referer_url: @ref)
@upload = @service.start! @upload = @service.start!
assert_equal(@direct, @upload.source)
end
end
should "work for a jpeg" do
@service = subject.new(source: @jpeg)
@upload = @service.start!
assert_equal("preprocessed", @upload.status) assert_equal("preprocessed", @upload.status)
assert_not_nil(@upload.md5) assert_equal(294591, @upload.file_size)
assert_equal("jpg", @upload.file_ext) assert_equal("jpg", @upload.file_ext)
assert_operator(@upload.file_size, :>, 0) assert_equal("3cb1ef624714c15dbb2d6e7b1d57faef", @upload.md5)
assert_not_nil(@upload.source)
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original)))
# this image is not large enough to generate a large file
#assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large)))
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview)))
end end
end
should "work for an ugoira" do context "for pixiv ugoira" do
@service = subject.new(source: @ugoira) setup do
@source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
end
should "download the file" do
@service = subject.new(source: @source)
@upload = @service.start! @upload = @service.start!
assert_equal("preprocessed", @upload.status) assert_equal("preprocessed", @upload.status)
assert_not_nil(@upload.md5) assert_equal(2804, @upload.file_size)
assert_equal("zip", @upload.file_ext) assert_equal("zip", @upload.file_ext)
assert_operator(@upload.file_size, :>, 0) assert_equal("cad1da177ef309bf40a117c17b8eecf5", @upload.md5)
assert_not_nil(@upload.source)
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :original))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :original)))
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :large))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :large)))
end end
end
context "for null" do
setup do
@source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg"
end
should "download the file" do
@service = subject.new(source: @source)
@upload = @service.start!
assert_equal("preprocessed", @upload.status)
assert_equal(181309, @upload.file_size)
assert_equal("jpg", @upload.file_ext)
assert_equal("93f4dd66ef1eb11a89e56d31f9adc8d0", @upload.md5)
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original)))
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large)))
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview)))
end
end
context "for a video" do
setup do
@source = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
end
should "work for a video" do should "work for a video" do
@service = subject.new(source: @video) @service = subject.new(source: @source)
@upload = @service.start! @upload = @service.start!
assert_equal("preprocessed", @upload.status) assert_equal("preprocessed", @upload.status)
assert_not_nil(@upload.md5) assert_not_nil(@upload.md5)
@@ -424,14 +462,16 @@ class UploadServiceTest < ActiveSupport::TestCase
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :original))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :original)))
assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :preview))) assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :preview)))
end end
end
context "on timeout errors" do context "on timeout errors" do
setup do setup do
@source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg"
HTTParty.stubs(:get).raises(Net::ReadTimeout) HTTParty.stubs(:get).raises(Net::ReadTimeout)
end end
should "leave the upload in an error state" do should "leave the upload in an error state" do
@service = subject.new(source: @video) @service = subject.new(source: @source)
@upload = @service.start! @upload = @service.start!
assert_match(/error:/, @upload.status) assert_match(/error:/, @upload.status)
end end
@@ -445,41 +485,15 @@ class UploadServiceTest < ActiveSupport::TestCase
FactoryBot.create(:user) FactoryBot.create(:user)
end end
CurrentUser.ip_addr = "127.0.0.1" CurrentUser.ip_addr = "127.0.0.1"
@source = "https://twitter.com/nounproject/status/540944400767922176"
end end
context "for twitter" do should "overwrite the attributes" do
setup do @service = subject.new(source: @source, rating: 'e')
@source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
@norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
@ref = "https://twitter.com/nounproject/status/540944400767922176"
end
should "record the correct source when a referer is given" do
@service = subject.new(source: @source, referer_url: @ref)
@upload = @service.start! @upload = @service.start!
@service = subject.new(source: @source)
@service.finish! @service.finish!
@upload.reload @upload.reload
assert_equal('e', @upload.rating)
assert_equal(@ref, @upload.source)
end
end
context "for pixiv" do
setup do
@source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735"
@ref = "http://www.pixiv.net/member.php?id=696859"
@direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
end
should "record the correct source" do
@service = subject.new(source: @source, referer_url: @ref)
@upload = @service.start!
@service = subject.new(source: @source)
@service.finish!
@upload.reload
assert_equal(@direct, @upload.source)
end
end end
end end
end end
@@ -637,7 +651,7 @@ class UploadServiceTest < ActiveSupport::TestCase
image_url = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" image_url = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
as_user { @post.replace!(replacement_url: replacement_url) } as_user { @post.replace!(replacement_url: replacement_url) }
assert_equal(image_url, @post.replacements.last.replacement_url) assert_equal(replacement_url, @post.replacements.last.replacement_url)
end end
end end
@@ -1027,7 +1041,33 @@ class UploadServiceTest < ActiveSupport::TestCase
CurrentUser.ip_addr = nil CurrentUser.ip_addr = nil
end end
context "for an ugoira" do context "for a pixiv" do
setup do
@source = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
@ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735"
@upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref)
end
should "record the canonical source" do
post = subject.new({}).create_post_from_upload(@upload)
assert_equal(@source, post.source)
end
end
context "for a twitter" do
setup do
@source = "https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:large"
@ref = "https://twitter.com/aranobu/status/817736083567820800"
@upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref)
end
should "record the canonical source" do
post = subject.new({}).create_post_from_upload(@upload)
assert_equal(@ref, post.source)
end
end
context "for a pixiv ugoira" do
setup do setup do
@upload = FactoryBot.create(:ugoira_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, context: UGOIRA_CONTEXT) @upload = FactoryBot.create(:ugoira_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, context: UGOIRA_CONTEXT)
end end

View File

@@ -1,23 +1,22 @@
require 'ptools' require 'ptools'
module DownloadTestHelper module DownloadTestHelper
def assert_downloaded(expected_filesize, source) def assert_downloaded(expected_filesize, source, referer=nil)
download = Downloads::File.new(source) download = Downloads::File.new(source, referer)
tempfile = download.download! tempfile, strategy = download.download!
assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}") assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}")
rescue Net::OpenTimeout rescue Net::OpenTimeout
skip "Remote connection to #{source} failed" skip "Remote connection to #{source} failed"
end end
def assert_rewritten(expected_source, test_source) def assert_rewritten(expected_source, test_source, test_referer=nil)
download = Downloads::File.new(test_source) strategy = Sources::Strategies.find(test_source, test_referer)
rewritten_source = strategy.image_url
rewritten_source, _, _ = download.before_download(test_source, {})
assert_match(expected_source, rewritten_source, "Tested source URL: #{test_source}") assert_match(expected_source, rewritten_source, "Tested source URL: #{test_source}")
end end
def assert_not_rewritten(source) def assert_not_rewritten(source, referer=nil)
assert_rewritten(source, source) assert_rewritten(source, source, referer)
end end
def check_ffmpeg def check_ffmpeg

View File

@@ -229,9 +229,7 @@ class ArtistTest < ActiveSupport::TestCase
should "find the correct artist for page URLs" do should "find the correct artist for page URLs" do
assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550") assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550")
assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289") assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289")
assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/gallery/#/d722mrt")
end end
should "find the correct artist for image URLs" do should "find the correct artist for image URLs" do
@@ -281,11 +279,6 @@ class ArtistTest < ActiveSupport::TestCase
assert_artist_found("bkub", "http://www.pixiv.net/i/46239857") assert_artist_found("bkub", "http://www.pixiv.net/i/46239857")
end end
should "find nothing for malformed URLs" do
assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=herpderp")
assert_artist_not_found("http://www.pixiv.net/wharrgarbl")
end
should "find nothing for bad IDs" do should "find nothing for bad IDs" do
assert_raises(PixivApiClient::BadIDError) do assert_raises(PixivApiClient::BadIDError) do
assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=32049358") assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=32049358")

View File

@@ -45,6 +45,56 @@ class ArtistUrlTest < ActiveSupport::TestCase
end end
end end
context "artstation urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"),
FactoryBot.create(:artist_url, url: "https://www.artstation.com/artist/koyorin"),
FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"),
FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4")
]
end
should "normalize" do
assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url)
assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url)
assert_equal("http://www.artstation.com/koyorin/", @urls[2].normalized_url)
assert_equal("http://www.artstation.com/jeyrain/", @urls[3].normalized_url)
end
end
context "deviantart urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"),
FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"),
FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave")
]
end
should "normalize" do
assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url)
assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url)
assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url)
end
end
context "nicoseiga urls" do
setup do
@urls = [
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"),
FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"),
FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663")
]
end
should "normalize" do
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[0].normalized_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[1].normalized_url)
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[2].normalized_url)
end
end
should "normalize fc2 urls" do should "normalize fc2 urls" do
url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet") url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet")
assert_equal("http://blog55.fc2.com/monet", url.url) assert_equal("http://blog55.fc2.com/monet", url.url)
@@ -56,13 +106,13 @@ class ArtistUrlTest < ActiveSupport::TestCase
end end
should "normalize deviant art artist urls" do should "normalize deviant art artist urls" do
url = FactoryBot.create(:artist_url, :url => "https://caidychen.deviantart.com/") url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
assert_equal("http://www.deviantart.com/caidychen/", url.normalized_url) assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url)
end end
should "normalize nico seiga artist urls" do should "normalize nico seiga artist urls" do
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/1826959") url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777")
assert_equal("http://seiga.nicovideo.jp/user/illust/1826959/", url.normalized_url) assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663") url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663")
assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url) assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
@@ -80,9 +130,9 @@ class ArtistUrlTest < ActiveSupport::TestCase
end end
should "normalize twitter urls" do should "normalize twitter urls" do
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/MONET/status/12345") url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552")
assert_equal("https://twitter.com/MONET/status/12345", url.url) assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url)
assert_equal("http://twitter.com/monet/status/12345/", url.normalized_url) assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
end end
end end
end end

View File

@@ -4,31 +4,35 @@ module Downloads
class ArtStationTest < ActiveSupport::TestCase class ArtStationTest < ActiveSupport::TestCase
context "a download for a (small) artstation image" do context "a download for a (small) artstation image" do
setup do setup do
@source = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" @asset = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974"
@download = Downloads::File.new(@source) @download = Downloads::File.new(@asset)
end end
should "download the large image instead" do should "download the large image instead" do
assert_equal("https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974", @download.source) file, strategy = @download.download!
assert_equal(517_706, ::File.size(file.path))
end end
end end
context "for an image where an original does not exist" do context "for an image where an original does not exist" do
setup do setup do
@source = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg" @asset = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg"
@download = Downloads::File.new(@source) @download = Downloads::File.new(@asset)
@download.download!
end end
should "not try to download the original" do should "not try to download the original" do
assert_equal("https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg", @download.source) file, strategy = @download.download!
assert_equal(449_047, ::File.size(file.path))
end end
end end
context "a download for an ArtStation image hosted on CloudFlare" do context "a download for an ArtStation image hosted on CloudFlare" do
setup do
@asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
end
should "return the original file, not the polished file" do should "return the original file, not the polished file" do
@source = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974" assert_downloaded(517_706, @asset) # polished size: 502_052
assert_downloaded(517_706, @source) # polished size: 502_052
end end
end end
@@ -36,11 +40,12 @@ module Downloads
setup do setup do
@source = "https://dantewontdie.artstation.com/projects/YZK5q" @source = "https://dantewontdie.artstation.com/projects/YZK5q"
@download = Downloads::File.new(@source) @download = Downloads::File.new(@source)
@download.download!
end end
should "download the original image instead" do should "download the original image instead" do
assert_equal("https://cdna.artstation.com/p/assets/images/images/006/066/534/large/yinan-cui-reika.jpg?1495781565", @download.source) file, strategy = @download.download!
assert_equal(237_651, ::File.size(file.path))
end end
end end
end end

View File

@@ -8,11 +8,7 @@ module Downloads
@source = "http://starbitt.deviantart.com/art/09271X-636962118" @source = "http://starbitt.deviantart.com/art/09271X-636962118"
@download = Downloads::File.new(@source) @download = Downloads::File.new(@source)
@tempfile = @download.download! @tempfile, strategy = @download.download!
end
should "set the html page as the source" do
assert_equal("https://orig00.deviantart.net/82ef/f/2016/271/7/1/aaaaaa_by_starbitt-daj8b46.gif", @download.source)
end end
should "work" do should "work" do

View File

@@ -41,7 +41,7 @@ module Downloads
end end
should "store the file in the tempfile path" do should "store the file in the tempfile path" do
tempfile = @download.download! tempfile, strategy = @download.download!
assert_equal(@source, @download.source) assert_equal(@source, @download.source)
assert_operator(tempfile.size, :>, 0, "should have data") assert_operator(tempfile.size, :>, 0, "should have data")
end end

View File

@@ -4,6 +4,7 @@ module Downloads
class PixivTest < ActiveSupport::TestCase class PixivTest < ActiveSupport::TestCase
def setup def setup
super super
Downloads::File.stubs(:is_cloudflare?).returns(false)
load_pixiv_tokens! load_pixiv_tokens!
end end
@@ -13,29 +14,6 @@ module Downloads
end end
context "in all cases" do context "in all cases" do
# Test an old illustration (one uploaded before 2014-09-16). New
# /img-original/ and /img-master/ URLs currently don't work for images
# uploaded before this date. Only old /imgXX/img/username/ URLs work.
context "downloading an old PNG illustration" do
setup do
@medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=14901720"
@big_page = "http://www.pixiv.net/member_illust.php?mode=big&illust_id=14901720"
@new_small_thumbnail = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
@new_medium_thumbnail = "http://i1.pixiv.net/c/600x600/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
@new_full_size_image = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
@file_size = 1261
end
should "work when using new URLs" do
# Don't know the actual file size of the thumbnails since they don't work.
assert_downloaded(1083, @new_small_thumbnail)
assert_downloaded(1083, @new_medium_thumbnail)
assert_downloaded(@file_size, @new_full_size_image)
end
end
# Test a new illustration (one uploaded after 2014-09-30). New illustrations # Test a new illustration (one uploaded after 2014-09-30). New illustrations
# must use /img-original/ for full size URLs. Old /imgXX/img/username/ style URLs # must use /img-original/ for full size URLs. Old /imgXX/img/username/ style URLs
# don't work for images uploaded after this date. # don't work for images uploaded after this date.
@@ -103,21 +81,6 @@ module Downloads
end end
end end
context "downloading a bad id image" do
setup do
@bad_id_full = "https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png"
@bad_id_sample = "https://i.pximg.net/c/600x600/img-master/img/2017/11/22/01/06/44/65991677_p0_master1200.jpg"
end
should "not raise an error when rewriting the url" do
assert_nothing_raised { assert_not_rewritten(@bad_id_full) }
end
should_eventually "rewrite bad id samples to full size" do
assert_rewritten(@bad_id_full, @bad_id_sample)
end
end
context "downloading a ugoira" do context "downloading a ugoira" do
setup do setup do
@medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364" @medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
@@ -138,6 +101,8 @@ module Downloads
context "downloading a profile image" do context "downloading a profile image" do
should "download new profile images" do should "download new profile images" do
skip "profile images are no longer supported"
@file_url = "https://i.pximg.net/user-profile/img/2014/12/18/10/31/23/8733472_7dc7310db6cc37163af145d04499e411_170.jpg" @file_url = "https://i.pximg.net/user-profile/img/2014/12/18/10/31/23/8733472_7dc7310db6cc37163af145d04499e411_170.jpg"
@file_size = 23_328 @file_size = 23_328
@@ -149,8 +114,10 @@ module Downloads
context "downloading a background image" do context "downloading a background image" do
should "download the image" do should "download the image" do
@file_url = "http://i1.pixiv.net/background/img/2016/05/17/12/05/48/2074388_d4ac52034f7ca0af3e083d59fde7e97f.jpg" skip "background images are no longer supported"
@file_size = 386_678
@file_url = "https://i.pximg.net/background/img/2015/10/25/08/45/27/198128_77ddf78cdb162e3d1c0d5134af185813.jpg"
@file_size = 0
assert_not_rewritten(@file_url) assert_not_rewritten(@file_url)
assert_downloaded(@file_size, @file_url) assert_downloaded(@file_size, @file_url)
@@ -159,21 +126,23 @@ module Downloads
context "downloading a novel image" do context "downloading a novel image" do
should "download new novel images" do should "download new novel images" do
@file_url = "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg" @file_url = "https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg"
@file_size = 316_311 @ref = 'https://www.pixiv.net/novel/show.php?id=8465454&mode=cover'
@file_size = 532_129
assert_not_rewritten(@file_url) assert_not_rewritten(@file_url, @ref)
assert_downloaded(@file_size, @file_url) assert_downloaded(@file_size, @file_url, @ref)
end end
end end
context "downloading a pixiv fanbox image" do context "downloading a pixiv fanbox image" do
should "work" do should "work" do
@file_url = "https://fanbox.pixiv.net/images/post/31757/w/1200/0CdXtgr4al3t43gQG4NZLnpQ.jpeg" @source = "https://www.pixiv.net/fanbox/creator/12491073/post/82406"
@file_size = 200_239 @file_url = "https://fanbox.pixiv.net/images/post/82406/D833IKA7FIesJXL8xx39rrG0.jpeg"
@file_size = 873_387
assert_not_rewritten(@file_url) assert_not_rewritten(@file_url, @source)
assert_downloaded(@file_size, @file_url) assert_downloaded(@file_size, @file_url, @source)
end end
end end
end end
@@ -181,12 +150,11 @@ module Downloads
context "An ugoira site for pixiv" do context "An ugoira site for pixiv" do
setup do setup do
@download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") @download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
@tempfile = @download.download! @tempfile, strategy = @download.download!
@tempfile.close! @tempfile.close!
end end
should "capture the data" do should "capture the data" do
assert_equal("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip", @download.source)
assert_equal(2, @download.data[:ugoira_frame_data].size) assert_equal(2, @download.data[:ugoira_frame_data].size)
if @download.data[:ugoira_frame_data][0]["file"] if @download.data[:ugoira_frame_data][0]["file"]
assert_equal([{"file"=>"000000.jpg", "delay"=>125}, {"file"=>"000001.jpg", "delay"=>125}], @download.data[:ugoira_frame_data]) assert_equal([{"file"=>"000000.jpg", "delay"=>125}, {"file"=>"000001.jpg", "delay"=>125}], @download.data[:ugoira_frame_data])

View File

@@ -2,43 +2,52 @@ require 'test_helper'
module Downloads module Downloads
class TumblrTest < ActiveSupport::TestCase class TumblrTest < ActiveSupport::TestCase
# Currently there's no way to obtain the raw version of these images,
# so we have to change the tests to validate against the 1280 version
context "a download for a tumblr 500 sample" do context "a download for a tumblr 500 sample" do
should "instead download the raw version" do should "instead download the 1280 version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@ref = "https://noizave.tumblr.com/post/162206271767"
@source = "https://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg" @source = "https://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg"
@rewrite = "http://data.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_raw.jpg" @rewrite = "https://media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_1280.jpg"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(196_617, @source) assert_downloaded(113909, @source, @ref)
# assert_downloaded(196_617, @source)
end end
end end
context "a download for a *.media.tumblr.com/tumblr_$id_$size image without a larger size" do context "a download for a *.media.tumblr.com/tumblr_$id_$size image without a larger size" do
should "download the same version" do should "download the same version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@ref = "https://noizave.tumblr.com/post/162206271767"
@source = "https://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" @source = "https://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg"
@rewrite = "http://data.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg" @rewrite = "https://media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_1280.jpg"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(90_122, @source) assert_downloaded(41803, @source, @ref)
# assert_downloaded(90_122, @source)
end end
end end
context "a download for a *.media.tumblr.com/tumblr_$id_$size image with a larger size" do context "a download for a *.media.tumblr.com/tumblr_$id_$size image with a larger size" do
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@ref = "https://noizave.tumblr.com/post/162206271767"
@source = "https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png" @source = "https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"
@rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png" @rewrite = "https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(34_060, @source) assert_downloaded(62658, @source, @ref)
end end
end end
context "a download for a *.media.tumblr.com/$hash/tumblr_$id_rN_$size image" do context "a download for a *.media.tumblr.com/$hash/tumblr_$id_rN_$size image" do
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@ref = "https://noizave.tumblr.com/post/162206271767"
@source = "https://33.media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_500.gif" @source = "https://33.media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_500.gif"
@rewrite = "http://data.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_raw.gif" @rewrite = "https://media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_1280.gif"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(1_234_017, @source) assert_downloaded(1_234_017, @source, @ref)
end end
end end
@@ -46,40 +55,33 @@ module Downloads
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@source = "https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif" @source = "https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif"
@rewrite = "http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif" @rewrite = "https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_1280.gif"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(110_348, @source) assert_downloaded(110_348, @source, @ref)
end end
end end
context "a download for a data.tumblr.com/$id_$size image with a larger size" do context "a download for a media.tumblr.com/$id_$size image with a larger size" do
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@source = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg" @ref = "https://noizave.tumblr.com/post/162206271767"
@rewrite = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg" @source = "http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg"
assert_rewritten(@rewrite, @source) @rewrite = "https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_1280.jpg"
assert_downloaded(153_885, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(122413, @source)
# assert_downloaded(153_885, @source)
end end
end end
context "a download for a data.tumblr.com/tumblr_$id_$size.jpg image" do context "a download for a media.tumblr.com/tumblr_$id_$size.jpg image" do
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@source = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg" @ref = "https://noizave.tumblr.com/post/162206271767"
@rewrite = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg" @source = "http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg"
assert_rewritten(@rewrite, @source) @rewrite = "https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"
assert_downloaded(296_399, @source) assert_rewritten(@rewrite, @source, @ref)
end assert_downloaded(101869, @source, @ref)
end # assert_downloaded(296_399, @source)
context "a download for a gs1.wac.edgecastcdn.net image" do
should "rewrite to the full tumblr version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@source = "https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"
@rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"
assert_downloaded(34_060, @source)
assert_rewritten(@rewrite, @source)
end end
end end
@@ -87,9 +89,9 @@ module Downloads
should "download the best available version" do should "download the best available version" do
skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
@source = "https://noizave.tumblr.com/post/162206271767" @source = "https://noizave.tumblr.com/post/162206271767"
@rewrite = "http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png" @rewrite = "https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"
assert_downloaded(3_620, @source) assert_downloaded(3655, @source)
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source)
end end
end end

View File

@@ -8,17 +8,19 @@ module Downloads
@source = "https://twitter.com/CincinnatiZoo/status/859073537713328129" @source = "https://twitter.com/CincinnatiZoo/status/859073537713328129"
@rewrite = "https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4" @rewrite = "https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source)
assert_downloaded(8_602_983, @source)
# this takes awhile so just skip it unless we really want to test it
# assert_downloaded(8_602_983, @source)
end end
end end
context "downloading a 'https://twitter.com/:user/status/:id/photo/:n' card url" do context "downloading a 'https://twitter.com/:user/status/:id/photo/:n' card url" do
should "download the orig file" do should "download the orig file" do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@source = "https://twitter.com/masayasuf/status/870734961778630656/photo/1" @source = "https://twitter.com/ry_o_ta_/status/1024316791688843269/photo/1"
@rewrite = "https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig" @rewrite = "https://pbs.twimg.com/media/Djcar72VsAAZsGa.jpg:orig"
assert_rewritten(@rewrite, @source) assert_rewritten(@rewrite, @source)
assert_downloaded(788_206, @source) assert_downloaded(103812, @source)
end end
end end
@@ -37,8 +39,9 @@ module Downloads
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large" @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
@rewrite = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig" @rewrite = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
assert_rewritten(@rewrite, @source) @ref = "https://twitter.com/nounproject/status/540944400767922176"
assert_downloaded(9800, @source) assert_rewritten(@rewrite, @source, @ref)
assert_downloaded(9800, @source, @ref)
end end
end end
end end

View File

@@ -1,37 +0,0 @@
require 'test_helper'
class PostReplacementTest < ActiveSupport::TestCase
def setup
super
mock_iqdb_service!
Delayed::Worker.delay_jobs = true # don't delete the old images right away
@system = FactoryBot.create(:user, created_at: 2.weeks.ago)
User.stubs(:system).returns(@system)
@uploader = FactoryBot.create(:user, created_at: 2.weeks.ago, can_upload_free: true)
@replacer = FactoryBot.create(:user, created_at: 2.weeks.ago, can_approve_posts: true)
CurrentUser.user = @replacer
CurrentUser.ip_addr = "127.0.0.1"
end
def teardown
super
CurrentUser.user = nil
CurrentUser.ip_addr = nil
Delayed::Worker.delay_jobs = false
end
context "Replacing" do
setup do
CurrentUser.scoped(@uploader, "127.0.0.2") do
attributes = FactoryBot.attributes_for(:jpg_upload, as_pending: "0", tag_string: "lowres tag1")
service = UploadService.new(attributes)
upload = service.start!
@post = upload.post
end
end
end
end

View File

@@ -1480,26 +1480,6 @@ class PostTest < ActiveSupport::TestCase
assert_equal(18557054, @post.pixiv_id) assert_equal(18557054, @post.pixiv_id)
@post.pixiv_id = nil @post.pixiv_id = nil
end end
context "but doesn't have a pixiv id" do
should "save the pixiv id" do
@post.pixiv_id = 1234
@post.update(source: "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg")
assert_nil(@post.pixiv_id)
@post.pixiv_id = 1234
@post.update(source: "http://i2.pixiv.net/background/img/2016/10/30/12/27/30/7059005_da9946b806c10d391a81ed1117cd33d6.jpg")
assert_nil(@post.pixiv_id)
@post.pixiv_id = 1234
@post.update(source: "http://i1.pixiv.net/img15/img/omega777/novel/2612734.jpg")
assert_nil(@post.pixiv_id)
@post.pixiv_id = 1234
@post.update(source: "http://img08.pixiv.net/profile/nice/1408837.jpg")
assert_nil(@post.pixiv_id)
end
end
end end
should "normalize pixiv links" do should "normalize pixiv links" do

View File

@@ -4,8 +4,7 @@ module Sources
class ArtStationTest < ActiveSupport::TestCase class ArtStationTest < ActiveSupport::TestCase
context "The source site for an art station artwork page" do context "The source site for an art station artwork page" do
setup do setup do
@site = Sources::Site.new("https://www.artstation.com/artwork/04XA4") @site = Sources::Strategies.find("https://www.artstation.com/artwork/04XA4")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -32,8 +31,7 @@ module Sources
context "The source site for an art station projects page" do context "The source site for an art station projects page" do
setup do setup do
@site = Sources::Site.new("https://dantewontdie.artstation.com/projects/YZK5q") @site = Sources::Strategies.find("https://dantewontdie.artstation.com/projects/YZK5q")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -61,8 +59,7 @@ module Sources
context "The source site for a www.artstation.com/artwork/$slug page" do context "The source site for a www.artstation.com/artwork/$slug page" do
setup do setup do
@site = Sources::Site.new("https://www.artstation.com/artwork/cody-from-sf") @site = Sources::Strategies.find("https://www.artstation.com/artwork/cody-from-sf")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -75,8 +72,7 @@ module Sources
setup do setup do
@url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg" @url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg"
@ref = "https://www.artstation.com/artwork/4BWW2" @ref = "https://www.artstation.com/artwork/4BWW2"
@site = Sources::Site.new(@url, referer_url: @ref) @site = Sources::Strategies.find(@url, @ref)
@site.get
end end
should "fetch the source data" do should "fetch the source data" do
@@ -86,8 +82,7 @@ module Sources
context "The source site for an ArtStation gallery" do context "The source site for an ArtStation gallery" do
setup do setup do
@site = Sources::Site.new("https://www.artstation.com/artwork/BDxrA") @site = Sources::Strategies.find("https://www.artstation.com/artwork/BDxrA")
@site.get
end end
should "get only image urls, not video urls" do should "get only image urls, not video urls" do

View File

@@ -9,8 +9,7 @@ module Sources
context "A path-based artist url" do context "A path-based artist url" do
setup do setup do
@site = Sources::Site.new("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
@site.get
end end
should "work" do should "work" do
@@ -20,8 +19,7 @@ module Sources
context "The source for a private DeviantArt image URL" do context "The source for a private DeviantArt image URL" do
setup do setup do
@site = Sources::Site.new("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png") @site = Sources::Strategies.find("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png")
@site.get
end end
should "work" do should "work" do
@@ -31,25 +29,24 @@ module Sources
context "The source for a download-disabled DeviantArt artwork page" do context "The source for a download-disabled DeviantArt artwork page" do
should "get the image url" do should "get the image url" do
@site = Sources::Site.new("https://noizave.deviantart.com/art/test-no-download-697415967") @site = Sources::Strategies.find("https://noizave.deviantart.com/art/test-no-download-697415967")
assert_equal(["https://img00.deviantart.net/56ee/i/2017/219/2/3/test__no_download_by_noizave-dbj81lr.jpg"], @site.image_urls) assert_equal(["https://img00.deviantart.net/56ee/i/2017/219/2/3/test__no_download_by_noizave-dbj81lr.jpg"], @site.image_urls)
end end
end end
context "The source for a DeviantArt image url" do context "The source for a DeviantArt image url" do
should "fetch the source data" do should "fetch the source data" do
@site = Sources::Site.new("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg") @site = Sources::Strategies.find("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg")
assert_equal("hideyoshi", @site.artist_name) assert_equal("hideyoshi", @site.artist_name)
assert_equal("https://hideyoshi.deviantart.com", @site.profile_url) assert_equal("https://www.deviantart.com/hideyoshi", @site.profile_url)
assert_equal("https://orig00.deviantart.net/9e1f/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url) assert_equal("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url)
end end
end end
context "The source for an DeviantArt artwork page" do context "The source for an DeviantArt artwork page" do
setup do setup do
@site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -107,8 +104,7 @@ module Sources
context "The source for a login-only DeviantArt artwork page" do context "The source for a login-only DeviantArt artwork page" do
setup do setup do
@site = Sources::Site.new("http://noizave.deviantart.com/art/hidden-work-685458369") @site = Sources::Strategies.find("http://noizave.deviantart.com/art/hidden-work-685458369")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -118,8 +114,7 @@ module Sources
context "A source with malformed links in the artist commentary" do context "A source with malformed links in the artist commentary" do
should "fix the links" do should "fix the links" do
@site = Sources::Site.new("https://teemutaiga.deviantart.com/art/Kisu-620666655") @site = Sources::Strategies.find("https://teemutaiga.deviantart.com/art/Kisu-620666655")
@site.get
assert_match(%r!"Print available at Inprnt":\[http://www.inprnt.com/gallery/teemutaiga/kisu\]!, @site.dtext_artist_commentary_desc) assert_match(%r!"Print available at Inprnt":\[http://www.inprnt.com/gallery/teemutaiga/kisu\]!, @site.dtext_artist_commentary_desc)
end end

View File

@@ -4,11 +4,8 @@ module Sources
class NicoSeigaTest < ActiveSupport::TestCase class NicoSeigaTest < ActiveSupport::TestCase
context "The source site for nico seiga" do context "The source site for nico seiga" do
setup do setup do
@site_1 = Sources::Site.new("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663") @site_1 = Sources::Strategies.find("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
@site_1.get @site_2 = Sources::Strategies.find("http://seiga.nicovideo.jp/seiga/im4937663")
@site_2 = Sources::Site.new("http://seiga.nicovideo.jp/seiga/im4937663")
@site_2.get
end end
should "get the profile" do should "get the profile" do
@@ -34,11 +31,11 @@ module Sources
should "get the tags" do should "get the tags" do
assert(@site_1.tags.size > 0) assert(@site_1.tags.size > 0)
first_tag = @site_1.tags.first first_tag = @site_1.tags.first
assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
assert(@site_2.tags.size > 0) assert(@site_2.tags.size > 0)
first_tag = @site_2.tags.first first_tag = @site_2.tags.first
assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag) assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
end end
should "convert a page into a json representation" do should "convert a page into a json representation" do
@@ -51,8 +48,7 @@ module Sources
end end
should "work for a https://lohas.nicoseiga.jp/thumb/${id}i url" do should "work for a https://lohas.nicoseiga.jp/thumb/${id}i url" do
site = Sources::Site.new("https://lohas.nicoseiga.jp/thumb/6844226i") site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i")
site.get
full_image_url = %r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226! full_image_url = %r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226!
assert_match(full_image_url, site.image_url) assert_match(full_image_url, site.image_url)

View File

@@ -7,9 +7,7 @@ module Sources
CurrentUser.user = FactoryBot.create(:user) CurrentUser.user = FactoryBot.create(:user)
CurrentUser.ip_addr = "127.0.0.1" CurrentUser.ip_addr = "127.0.0.1"
@site = Sources::Site.new("http://nijie.info/view.php?id=213043") @site = Sources::Strategies.find("https://nijie.info/view.php?id=213043")
@site.get
sleep(5)
end end
should "get the image url" do should "get the image url" do
@@ -17,7 +15,7 @@ module Sources
end end
should "get the profile" do should "get the profile" do
assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
end end
should "get the artist name" do should "get the artist name" do
@@ -25,15 +23,14 @@ module Sources
end end
should "get the tags" do should "get the tags" do
assert_equal([["眼鏡", "http://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "http://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "http://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags) assert_equal([["眼鏡", "https://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "https://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "https://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags)
end end
should "normalize characters in tags" do should "normalize characters in tags" do
FactoryBot.create(:tag, :name => "kaga") FactoryBot.create(:tag, :name => "kaga")
FactoryBot.create(:wiki_page, :title => "kaga", :other_names => "加賀(艦これ)") FactoryBot.create(:wiki_page, :title => "kaga", :other_names => "加賀(艦これ)")
@site = Sources::Site.new("http://nijie.info/view.php?id=208316") @site = Sources::Strategies.find("https://nijie.info/view.php?id=208316")
@site.get
assert_includes(@site.tags.map(&:first), "加賀(艦これ)") assert_includes(@site.tags.map(&:first), "加賀(艦これ)")
assert_includes(@site.translated_tags.map(&:first), "kaga") assert_includes(@site.translated_tags.map(&:first), "kaga")
@@ -50,16 +47,15 @@ module Sources
context "The source site for a nijie referer url" do context "The source site for a nijie referer url" do
setup do setup do
@site = Sources::Site.new("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", referer_url: "https://nijie.info/view_popup.php?id=213043") @site = Sources::Strategies.find("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", "https://nijie.info/view_popup.php?id=213043")
@site.get
end end
should "get the image url" do should "get the image url" do
assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url) assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url)
end end
should "get the profile" do should "get the profile" do
assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
end end
should "get the artist name" do should "get the artist name" do
@@ -69,8 +65,7 @@ module Sources
context "The source site for a nijie popup" do context "The source site for a nijie popup" do
setup do setup do
@site = Sources::Site.new("https://nijie.info/view_popup.php?id=213043") @site = Sources::Strategies.find("https://nijie.info/view_popup.php?id=213043")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -78,7 +73,7 @@ module Sources
end end
should "get the profile" do should "get the profile" do
assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url) assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
end end
should "get the artist name" do should "get the artist name" do
@@ -88,8 +83,7 @@ module Sources
context "The source site for a nijie gallery" do context "The source site for a nijie gallery" do
setup do setup do
@site = Sources::Site.new("http://nijie.info/view.php?id=218856") @site = Sources::Strategies.find("https://nijie.info/view.php?id=218856")
@site.get
end end
should "get the image urls" do should "get the image urls" do

View File

@@ -5,8 +5,7 @@ module Sources
context "The source site for a https://pawoo.net/web/status/$id url" do context "The source site for a https://pawoo.net/web/status/$id url" do
setup do setup do
skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
@site = Sources::Site.new("https://pawoo.net/web/statuses/1202176") @site = Sources::Strategies.find("https://pawoo.net/web/statuses/1202176")
@site.get
end end
should "get the profile" do should "get the profile" do
@@ -35,8 +34,7 @@ module Sources
context "The source site for a https://pawoo.net/$user/$id url" do context "The source site for a https://pawoo.net/$user/$id url" do
setup do setup do
skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
@site = Sources::Site.new("https://pawoo.net/@evazion/19451018") @site = Sources::Strategies.find("https://pawoo.net/@evazion/19451018")
@site.get
end end
should "get the profile" do should "get the profile" do
@@ -89,8 +87,7 @@ module Sources
skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
@url = "https://img.pawoo.net/media_attachments/files/001/298/028/original/55a6fd252778454b.mp4" @url = "https://img.pawoo.net/media_attachments/files/001/298/028/original/55a6fd252778454b.mp4"
@ref = "https://pawoo.net/@evazion/19451018" @ref = "https://pawoo.net/@evazion/19451018"
@site = Sources::Site.new(@url, referer_url: @ref) @site = Sources::Strategies.find(@url, @ref)
@site.get
end end
should "fetch the source data" do should "fetch the source data" do

View File

@@ -3,8 +3,8 @@ require 'test_helper'
module Sources module Sources
class PixivTest < ActiveSupport::TestCase class PixivTest < ActiveSupport::TestCase
def get_source(source) def get_source(source)
@site = Sources::Site.new(source) @site = Sources::Strategies.find(source)
@site.get
@site @site
rescue Net::OpenTimeout rescue Net::OpenTimeout
skip "Remote connection to #{source} failed" skip "Remote connection to #{source} failed"
@@ -23,19 +23,22 @@ module Sources
context "in all cases" do context "in all cases" do
context "A touch page" do context "A touch page" do
setup do setup do
@site = Sources::Site.new("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915") @site = Sources::Strategies.find("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915")
@image_urls = @site.get @image_urls = @site.image_urls
end end
should "get all the image urls" do should "get all the image urls" do
assert_equal("https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png", @image_urls) expected_urls = [
"https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png",
"https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p1.png"
].sort
assert_equal(expected_urls, @image_urls.sort)
end end
end end
context "A gallery page" do context "A gallery page" do
setup do setup do
@site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482") @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482")
@site.get
@image_urls = @site.image_urls @image_urls = @site.image_urls
end end
@@ -46,8 +49,7 @@ module Sources
context "An ugoira source site for pixiv" do context "An ugoira source site for pixiv" do
setup do setup do
@site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364") @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
@site.get
end end
should "get the file url" do should "get the file url" do
@@ -66,8 +68,7 @@ module Sources
context "A https://i.pximg.net/img-zip/ugoira/* source" do context "A https://i.pximg.net/img-zip/ugoira/* source" do
should "get the metadata" do should "get the metadata" do
@site = Sources::Site.new("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip") @site = Sources::Strategies.find("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip")
@site.get
assert_equal("uroobnad2", @site.artist_name) assert_equal("uroobnad2", @site.artist_name)
end end
@@ -79,7 +80,7 @@ module Sources
end end
should "get the profile" do should "get the profile" do
assert_equal("http://www.pixiv.net/member.php?id=696859", @site.profile_url) assert_equal("https://www.pixiv.net/member.php?id=696859", @site.profile_url)
end end
should "get the artist name" do should "get the artist name" do
@@ -142,12 +143,17 @@ module Sources
should "get the full size image url" do should "get the full size image url" do
assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.image_url) assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.image_url)
end end
should "get the full size image url for the canonical url" do
assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url)
end
end end
context "fetching source data for a deleted work" do context "fetching source data for a deleted work" do
should "raise a bad id error" do should "raise a bad id error" do
assert_raise(::PixivApiClient::BadIDError) do assert_raise(::PixivApiClient::BadIDError) do
get_source("https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png") get_source("https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png")
@site.image_urls
end end
end end
end end

View File

@@ -9,8 +9,7 @@ module Sources
context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do
setup do setup do
@site = Sources::Site.new("https://noizave.tumblr.com/post/162206271767") @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162206271767")
@site.get
end end
should "get the artist name" do should "get the artist name" do
@@ -22,7 +21,7 @@ module Sources
end end
should "get the tags" do should "get the tags" do
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]] tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
assert_equal(tags, @site.tags) assert_equal(tags, @site.tags)
end end
@@ -68,7 +67,7 @@ module Sources
end end
should "get the image url" do should "get the image url" do
assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url) assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url)
end end
should "get the artist" do should "get the artist" do
@@ -82,16 +81,15 @@ module Sources
context "The source for a 'http://*.tumblr.com/image/*' image page" do context "The source for a 'http://*.tumblr.com/image/*' image page" do
setup do setup do
@site = Sources::Site.new("https://noizave.tumblr.com/image/162206271767") @site = Sources::Strategies.find("https://noizave.tumblr.com/image/162206271767")
@site.get
end end
should "get the image url" do should "get the image url" do
assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url) assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url)
end end
should "get the tags" do should "get the tags" do
tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]] tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
assert_equal(tags, @site.tags) assert_equal(tags, @site.tags)
end end
end end
@@ -100,20 +98,19 @@ module Sources
setup do setup do
@url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg" @url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"
@ref = "https://noizave.tumblr.com/post/162094447052" @ref = "https://noizave.tumblr.com/post/162094447052"
@site = Sources::Site.new(@url, referer_url: @ref) @site = Sources::Strategies.find(@url, @ref)
@site.get
end end
should "get the image urls" do should "get the image urls" do
urls = %w[ urls = %w[
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_raw.png https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_1280.png
http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_raw.jpg https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg
http://data.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_raw.gif https://media.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_1280.gif
http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_raw.png https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_1280.png
http://data.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_raw.gif https://media.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_1280.gif
] ]
assert_equal(urls, @site.image_urls) assert_equal(urls.sort, @site.image_urls.sort)
end end
should "get the tags" do should "get the tags" do
@@ -129,17 +126,16 @@ module Sources
context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do
setup do setup do
@site = Sources::Site.new("https://noizave.tumblr.com/post/162221502947") @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162221502947")
@site.get
end end
should "get the image urls" do should "get the image urls" do
urls = %w[ urls = %w[
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_raw.png https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png
http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_raw.jpg https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg
] ]
assert_equal(urls, @site.image_urls) assert_equal(urls.sort, @site.image_urls.sort)
end end
should "get the commentary" do should "get the commentary" do
@@ -151,14 +147,13 @@ module Sources
context "The source for a 'http://*.tumblr.com/post/*' video post with inline images" do context "The source for a 'http://*.tumblr.com/post/*' video post with inline images" do
setup do setup do
@site = Sources::Site.new("https://noizave.tumblr.com/post/162222617101") @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162222617101")
@site.get
end end
should "get the image urls" do should "get the image urls" do
urls = %w[ urls = %w[
https://vtt.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4 https://vtt.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4
http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_raw.png https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png
] ]
assert_equal(urls, @site.image_urls) assert_equal(urls, @site.image_urls)
@@ -167,12 +162,11 @@ module Sources
context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do
setup do setup do
@site = Sources::Site.new("https://noizave.tumblr.com/post/171237880542/test-ask") @site = Sources::Strategies.find("https://noizave.tumblr.com/post/171237880542/test-ask")
@site.get
end end
should "get the image urls" do should "get the image urls" do
urls = ["http://data.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_raw.png"] urls = ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"]
assert_equal(urls, @site.image_urls) assert_equal(urls, @site.image_urls)
end end

View File

@@ -2,79 +2,16 @@ require 'test_helper'
module Sources module Sources
class TwitterTest < ActiveSupport::TestCase class TwitterTest < ActiveSupport::TestCase
context "A video" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/CincinnatiZoo/status/859073537713328129")
@site.get
end
should "get the image url" do
assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url)
end
end
context "An animated gif" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/DaniStrawberry1/status/859435334765088769")
@site.get
end
should "get the image url" do
assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url)
end
end
context "A twitter summary card" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/NatGeo/status/932700115936178177")
@site.get
end
should "get the image url" do
assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url)
end
end
context "A twitter summary card from twitter" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/masayasuf/status/870734961778630656/photo/1")
@site.get
end
should "get the image url" do
assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url)
end
end
context "A twitter summary card from twitter with a :large image" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/aranobu/status/817736083567820800")
@site.get
end
should "get the image url" do
assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url)
end
end
context "An extended tweet" do context "An extended tweet" do
should "extract the correct image url" do should "extract the correct image url" do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/onsen_musume_jp/status/865534101918330881") @site = Sources::Strategies.find("https://twitter.com/onsen_musume_jp/status/865534101918330881")
@site.get
assert_equal(["https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"], @site.image_urls) assert_equal(["https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"], @site.image_urls)
end end
should "extract all the image urls" do should "extract all the image urls" do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/aoimanabu/status/892370963630743552") @site = Sources::Strategies.find("https://twitter.com/aoimanabu/status/892370963630743552")
@site.get
urls = %w[ urls = %w[
https://pbs.twimg.com/media/DGJWp59UIAA_-en.jpg:orig https://pbs.twimg.com/media/DGJWp59UIAA_-en.jpg:orig
@@ -86,11 +23,71 @@ module Sources
end end
end end
context "A video" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129")
end
should "get the image url" do
assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url)
end
end
context "An animated gif" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://twitter.com/DaniStrawberry1/status/859435334765088769")
end
should "get the image url" do
assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url)
end
end
context "A twitter summary card" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://twitter.com/NatGeo/status/932700115936178177")
end
should "get the image url" do
assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url)
end
end
context "A twitter summary card from twitter" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656/photo/1")
end
should "get the image url" do
skip "Find another url, the masayasuf tweet no longer exists"
assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url)
end
end
context "A twitter summary card from twitter with a :large image" do
setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://twitter.com/aranobu/status/817736083567820800")
end
should "get the image url" do
assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url)
end
should "get the canonical url" do
assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url)
end
end
context "The source site for a restricted twitter" do context "The source site for a restricted twitter" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://mobile.twitter.com/Strangestone/status/556440271961858051") @site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051")
@site.get
end end
should "get the image url" do should "get the image url" do
@@ -101,8 +98,7 @@ module Sources
context "The source site for twitter" do context "The source site for twitter" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://mobile.twitter.com/nounproject/status/540944400767922176") @site = Sources::Strategies.find("https://mobile.twitter.com/nounproject/status/540944400767922176")
@site.get
end end
should "get the profile" do should "get the profile" do
@@ -135,8 +131,7 @@ module Sources
context "The source site for a direct image and a referer" do context "The source site for a direct image and a referer" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", referer_url: "https://twitter.com/nounproject/status/540944400767922176") @site = Sources::Strategies.find("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", "https://twitter.com/nounproject/status/540944400767922176")
@site.get
end end
should "get the artist name" do should "get the artist name" do
@@ -151,8 +146,7 @@ module Sources
context "The source site for a https://twitter.com/i/web/status/:id url" do context "The source site for a https://twitter.com/i/web/status/:id url" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/i/web/status/943446161586733056") @site = Sources::Strategies.find("https://twitter.com/i/web/status/943446161586733056")
@site.get
end end
should "fetch the source data" do should "fetch the source data" do
@@ -163,8 +157,7 @@ module Sources
context "A tweet" do context "A tweet" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Site.new("https://twitter.com/noizave/status/875768175136317440") @site = Sources::Strategies.find("https://twitter.com/noizave/status/875768175136317440")
@site.get
end end
should "convert urls, hashtags, and mentions to dtext" do should "convert urls, hashtags, and mentions to dtext" do

View File

@@ -17,6 +17,7 @@ class TagAliasCorrectionTest < ActiveSupport::TestCase
context "with a bad cache and post counts" do context "with a bad cache and post counts" do
setup do setup do
Cache.delete("ta:#{Cache.hash('bbb')}")
Cache.put("ta:#{Cache.hash('aaa')}", "zzz") Cache.put("ta:#{Cache.hash('aaa')}", "zzz")
Tag.where(:name => "aaa").update_all("post_count = -3") Tag.where(:name => "aaa").update_all("post_count = -3")
@correction = TagAliasCorrection.new(@tag_alias.id) @correction = TagAliasCorrection.new(@tag_alias.id)