integrate ugoiras into zip+webm+preview
This commit is contained in:
@@ -2,45 +2,49 @@ module Downloads
|
||||
class File
|
||||
class Error < Exception ; end
|
||||
|
||||
attr_reader :tries
|
||||
attr_reader :data
|
||||
attr_accessor :source, :content_type, :file_path
|
||||
|
||||
def initialize(source, file_path)
|
||||
def initialize(source, file_path, options = {})
|
||||
# source can potentially get rewritten in the course
|
||||
# of downloading a file, so check it again
|
||||
@source = source
|
||||
|
||||
# where to save the download
|
||||
@file_path = file_path
|
||||
@tries = 0
|
||||
|
||||
# we sometimes need to capture data from the source page
|
||||
@data = {:is_ugoira => options[:is_ugoira]}
|
||||
end
|
||||
|
||||
def download!
|
||||
http_get_streaming do |response|
|
||||
@source, @data = http_get_streaming(@source, @data) do |response|
|
||||
self.content_type = response["Content-Type"]
|
||||
::File.open(file_path, "wb") do |out|
|
||||
::File.open(@file_path, "wb") do |out|
|
||||
response.read_body(out)
|
||||
end
|
||||
end
|
||||
after_download
|
||||
@source = after_download(@source)
|
||||
end
|
||||
|
||||
def before_download(url, headers)
|
||||
def before_download(url, headers, datums)
|
||||
RewriteStrategies::Base.strategies.each do |strategy|
|
||||
url, headers = strategy.new.rewrite(url, headers)
|
||||
url, headers, datums = strategy.new.rewrite(url, headers, datums)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, datums]
|
||||
end
|
||||
|
||||
def after_download
|
||||
fix_image_board_sources
|
||||
def after_download(src)
|
||||
fix_image_board_sources(src)
|
||||
end
|
||||
|
||||
def url
|
||||
URI.parse(source)
|
||||
end
|
||||
|
||||
def http_get_streaming(options = {})
|
||||
def http_get_streaming(src, datums = {}, options = {})
|
||||
max_size = options[:max_size] || Danbooru.config.max_file_size
|
||||
max_size = nil if max_size == 0 # unlimited
|
||||
limit = 4
|
||||
tries = 0
|
||||
url = URI.parse(src)
|
||||
|
||||
while true
|
||||
unless url.is_a?(URI::HTTP) || url.is_a?(URI::HTTPS)
|
||||
@@ -50,7 +54,8 @@ module Downloads
|
||||
headers = {
|
||||
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
|
||||
}
|
||||
@source, headers = before_download(source, headers)
|
||||
src, headers, datums = before_download(src, headers, datums)
|
||||
url = URI.parse(src)
|
||||
|
||||
begin
|
||||
Net::HTTP.start(url.host, url.port, :use_ssl => url.is_a?(URI::HTTPS)) do |http|
|
||||
@@ -63,13 +68,13 @@ module Downloads
|
||||
raise Error.new("File is too large (#{len} bytes)") if len && len.to_i > max_size
|
||||
end
|
||||
yield(res)
|
||||
return
|
||||
return [src, datums]
|
||||
|
||||
when Net::HTTPRedirection then
|
||||
if limit == 0 then
|
||||
raise Error.new("Too many redirects")
|
||||
end
|
||||
@source = res["location"]
|
||||
src = res["location"]
|
||||
limit -= 1
|
||||
|
||||
else
|
||||
@@ -78,19 +83,23 @@ module Downloads
|
||||
end # http.request_get
|
||||
end # http.start
|
||||
rescue Errno::ECONNRESET, Errno::ETIMEDOUT, Errno::EIO, Errno::EHOSTUNREACH, Errno::ECONNREFUSED, IOError => x
|
||||
@tries += 1
|
||||
if @tries < 3
|
||||
tries += 1
|
||||
if tries < 3
|
||||
retry
|
||||
else
|
||||
raise
|
||||
end
|
||||
end
|
||||
end # while
|
||||
|
||||
[src, datums]
|
||||
end # def
|
||||
|
||||
def fix_image_board_sources
|
||||
if source =~ /i\.4cdn\.org|\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/
|
||||
@source = "Image board"
|
||||
def fix_image_board_sources(src)
|
||||
if src =~ /i\.4cdn\.org|\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/
|
||||
"Image board"
|
||||
else
|
||||
src
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
@@ -5,8 +5,8 @@ module Downloads
|
||||
[Pixiv, NicoSeiga, Twitpic, DeviantArt, Tumblr, Moebooru]
|
||||
end
|
||||
|
||||
def rewrite(url, headers)
|
||||
return [url, headers]
|
||||
def rewrite(url, headers, data = {})
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class DeviantArt < Base
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ /https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -3,12 +3,12 @@ module Downloads
|
||||
class Moebooru < Base
|
||||
DOMAINS = '(?:[^.]+\.)?yande\.re|konachan\.com'
|
||||
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://(?:#{DOMAINS})}
|
||||
url, headers = rewrite_jpeg_versions(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class NicoSeiga < Base
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://lohas\.nicoseiga\.jp} || url =~ %r{https?://seiga\.nicovideo\.jp}
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Pixiv < Base
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ /https?:\/\/(?:\w+\.)?pixiv\.net/
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_cdn(url, headers)
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers, data = rewrite_html_pages(url, headers, data)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
url, headers = rewrite_old_small_manga_pages(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
@@ -31,9 +31,12 @@ module Downloads
|
||||
if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
|
||||
source = ::Sources::Strategies::Pixiv.new(url)
|
||||
source.get
|
||||
return [source.image_url, headers]
|
||||
data[:ugoira_frame_data] = source.ugoira_frame_data
|
||||
data[:ugoira_width] = source.ugoira_width
|
||||
data[:ugoira_height] = source.ugoira_height
|
||||
return [source.file_url, headers, data]
|
||||
else
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Tumblr < Base
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{^https?://.*tumblr\.com}
|
||||
url, headers = rewrite_cdn(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
module Downloads
|
||||
module RewriteStrategies
|
||||
class Twitpic < Base
|
||||
def rewrite(url, headers)
|
||||
def rewrite(url, headers, data = {})
|
||||
if url =~ %r{https?://twitpic\.com} || url =~ %r{^https?://d3j5vwomefv46c\.cloudfront\.net}
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
return [url, headers, data]
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
@@ -1,43 +1,11 @@
|
||||
class PixivUgoiraConverter
|
||||
attr_reader :agent, :url, :write_path, :format
|
||||
|
||||
def initialize(url, write_path, format)
|
||||
@url = url
|
||||
@write_path = write_path
|
||||
@format = format
|
||||
def convert(source_path, output_path, preview_path, frame_data)
|
||||
folder = unpack(File.open(source_path))
|
||||
write_webm(folder, output_path, frame_data)
|
||||
write_preview(folder, preview_path)
|
||||
end
|
||||
|
||||
def process!
|
||||
folder = unpack(fetch_zipped_body)
|
||||
|
||||
if format == :gif
|
||||
write_gif(folder)
|
||||
elsif format == :webm
|
||||
write_webm(folder)
|
||||
elsif format == :apng
|
||||
write_apng(folder)
|
||||
end
|
||||
end
|
||||
|
||||
def write_gif(folder)
|
||||
anim = Magick::ImageList.new
|
||||
delay_sum = 0
|
||||
folder.each_with_index do |file, i|
|
||||
image_blob = file.get_input_stream.read
|
||||
image = Magick::Image.from_blob(image_blob).first
|
||||
image.ticks_per_second = 1000
|
||||
delay = @frame_data[i]["delay"]
|
||||
rounded_delay = (delay_sum + delay).round(-1) - delay_sum.round(-1)
|
||||
image.delay = rounded_delay
|
||||
delay_sum += delay
|
||||
anim << image
|
||||
end
|
||||
|
||||
anim = anim.optimize_layers(Magick::OptimizeTransLayer)
|
||||
anim.write("gif:" + write_path)
|
||||
end
|
||||
|
||||
def write_webm(folder)
|
||||
def write_webm(folder, write_path, frame_data)
|
||||
Dir.mktmpdir do |tmpdir|
|
||||
FileUtils.mkdir_p("#{tmpdir}/images")
|
||||
folder.each_with_index do |file, i|
|
||||
@@ -62,7 +30,7 @@ class PixivUgoiraConverter
|
||||
timecodes_path = File.join(tmpdir, "timecodes.tc")
|
||||
File.open(timecodes_path, "w+") do |f|
|
||||
f.write("# timecode format v2\n")
|
||||
@frame_data.each do |img|
|
||||
frame_data.each do |img|
|
||||
f.write("#{delay_sum}\n")
|
||||
delay_sum += img["delay"]
|
||||
end
|
||||
@@ -71,68 +39,21 @@ class PixivUgoiraConverter
|
||||
end
|
||||
|
||||
ext = folder.first.name.match(/\.(\w{,4})$/)[1]
|
||||
system("ffmpeg -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx -crf 4 -b:v 5000k -an #{tmpdir}/tmp.webm")
|
||||
system("mkvmerge -o #{write_path} --webm --timecodes 0:#{tmpdir}/timecodes.tc #{tmpdir}/tmp.webm")
|
||||
system("ffmpeg -loglevel quiet -i #{tmpdir}/images/%06d.#{ext} -codec:v libvpx -crf 4 -b:v 5000k -an #{tmpdir}/tmp.webm")
|
||||
system("mkvmerge -q -o #{write_path} --webm --timecodes 0:#{tmpdir}/timecodes.tc #{tmpdir}/tmp.webm")
|
||||
end
|
||||
end
|
||||
|
||||
def write_apng(folder)
|
||||
Dir.mktmpdir do |tmpdir|
|
||||
folder.each_with_index do |file, i|
|
||||
frame_path = File.join(tmpdir, "frame#{"%03d" % i}.png")
|
||||
delay_path = File.join(tmpdir, "frame#{"%03d" % i}.txt")
|
||||
image_blob = file.get_input_stream.read
|
||||
delay = @frame_data[i]["delay"]
|
||||
image = Magick::Image.from_blob(image_blob).first
|
||||
image.format = "PNG"
|
||||
image.write(frame_path)
|
||||
File.open(delay_path, "wb") do |f|
|
||||
f.write("delay=#{delay}/1000")
|
||||
end
|
||||
end
|
||||
system("apngasm -o -F #{write_path} #{tmpdir}/frame*.png")
|
||||
end
|
||||
def write_preview(folder, path)
|
||||
file = folder.first
|
||||
image_blob = file.get_input_stream.read
|
||||
image = Magick::Image.from_blob(image_blob).first
|
||||
image.write(path)
|
||||
end
|
||||
|
||||
def unpack(zipped_body)
|
||||
def unpack(zip_file)
|
||||
folder = Zip::CentralDirectory.new
|
||||
folder.read_from_stream(StringIO.new(zipped_body))
|
||||
folder.read_from_stream(zip_file)
|
||||
folder
|
||||
end
|
||||
|
||||
def fetch_zipped_body
|
||||
zip_body = nil
|
||||
zip_url, @frame_data = fetch_frames
|
||||
|
||||
Downloads::File.new(zip_url, nil).http_get_streaming do |response|
|
||||
zip_body = response.body
|
||||
end
|
||||
|
||||
zip_body
|
||||
end
|
||||
|
||||
def agent
|
||||
@agent ||= Sources::Strategies::Pixiv.new(url).agent
|
||||
end
|
||||
|
||||
def fetch_frames
|
||||
agent.get(url) do |page|
|
||||
# Get the zip url and frame delay by parsing javascript contained in a <script> tag on the page.
|
||||
# Not a neat solution, but I haven't found any other location that has the frame delays listed.
|
||||
scripts = page.search("body script").find_all do |node|
|
||||
node.text =~ /_ugoira600x600\.zip/
|
||||
end
|
||||
|
||||
if scripts.any?
|
||||
javascript = scripts.first.text
|
||||
json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
|
||||
data = JSON.parse(json)
|
||||
zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
|
||||
frame_data = data["frames"]
|
||||
return [zip_url, frame_data]
|
||||
else
|
||||
raise "Can't find javascript with frame data"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
21
app/logical/pixiv_ugoira_service.rb
Normal file
21
app/logical/pixiv_ugoira_service.rb
Normal file
@@ -0,0 +1,21 @@
|
||||
class PixivUgoiraService
|
||||
attr_reader :width, :height, :frame_data
|
||||
|
||||
def process(post)
|
||||
save_frame_data(post)
|
||||
end
|
||||
|
||||
def save_frame_data(post)
|
||||
PixivUgoiraFrameData.create(:data => @frame_data, :post_id => post.id)
|
||||
end
|
||||
|
||||
def generate_resizes(source_path, output_path, preview_path)
|
||||
PixivUgoiraConverter.new.convert(source_path, output_path, preview_path, @frame_data)
|
||||
end
|
||||
|
||||
def load(data)
|
||||
@frame_data = data[:ugoira_frame_data]
|
||||
@width = data[:ugoira_width]
|
||||
@height = data[:ugoira_height]
|
||||
end
|
||||
end
|
||||
24
app/logical/pixiv_web_agent.rb
Normal file
24
app/logical/pixiv_web_agent.rb
Normal file
@@ -0,0 +1,24 @@
|
||||
class PixivWebAgent
|
||||
def self.build
|
||||
mech = Mechanize.new
|
||||
|
||||
phpsessid = Cache.get("pixiv-phpsessid")
|
||||
if phpsessid
|
||||
cookie = Mechanize::Cookie.new("PHPSESSID", phpsessid)
|
||||
cookie.domain = ".pixiv.net"
|
||||
cookie.path = "/"
|
||||
mech.cookie_jar.add(cookie)
|
||||
else
|
||||
mech.get("http://www.pixiv.net") do |page|
|
||||
page.form_with(:action => "/login.php") do |form|
|
||||
form['pixiv_id'] = Danbooru.config.pixiv_login
|
||||
form['pass'] = Danbooru.config.pixiv_password
|
||||
end.click_button
|
||||
end
|
||||
phpsessid = mech.cookie_jar.cookies.select{|c| c.name == "PHPSESSID"}.first
|
||||
Cache.put("pixiv-phpsessid", phpsessid.value, 1.month) if phpsessid
|
||||
end
|
||||
|
||||
mech
|
||||
end
|
||||
end
|
||||
@@ -5,7 +5,7 @@ module Sources
|
||||
|
||||
class Site
|
||||
attr_reader :url, :strategy
|
||||
delegate :get, :referer_url, :site_name, :artist_name, :profile_url, :image_url, :tags, :artist_record, :unique_id, :page_count, :to => :strategy
|
||||
delegate :get, :referer_url, :site_name, :artist_name, :profile_url, :image_url, :tags, :artist_record, :unique_id, :page_count, :file_url, :ugoira_frame_data, :ugoira_width, :ugoira_height, :to => :strategy
|
||||
|
||||
def self.strategies
|
||||
[Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::Nijie]
|
||||
|
||||
@@ -5,6 +5,8 @@ require 'csv'
|
||||
module Sources
|
||||
module Strategies
|
||||
class Pixiv < Base
|
||||
attr_reader :zip_url, :ugoira_frame_data, :ugoira_width, :ugoira_height
|
||||
|
||||
def self.url_match?(url)
|
||||
url =~ /^https?:\/\/(?:\w+\.)?pixiv\.net/
|
||||
end
|
||||
@@ -43,6 +45,8 @@ module Sources
|
||||
agent.get(URI.parse(normalized_url)) do |page|
|
||||
@artist_name, @profile_url = get_profile_from_page(page)
|
||||
@pixiv_moniker = get_moniker_from_page(page)
|
||||
@image_url = get_image_url_from_page(page)
|
||||
@zip_url, @ugoira_frame_data, @ugoira_width, @ugoira_height = get_zip_url_from_page(page)
|
||||
@tags = get_tags_from_page(page)
|
||||
@page_count = get_page_count_from_page(page)
|
||||
|
||||
@@ -58,28 +62,11 @@ module Sources
|
||||
end
|
||||
|
||||
def agent
|
||||
@agent ||= begin
|
||||
mech = Mechanize.new
|
||||
@agent ||= PixivWebAgent.build
|
||||
end
|
||||
|
||||
phpsessid = Cache.get("pixiv-phpsessid")
|
||||
if phpsessid
|
||||
cookie = Mechanize::Cookie.new("PHPSESSID", phpsessid)
|
||||
cookie.domain = ".pixiv.net"
|
||||
cookie.path = "/"
|
||||
mech.cookie_jar.add(cookie)
|
||||
else
|
||||
mech.get("http://www.pixiv.net") do |page|
|
||||
page.form_with(:action => "/login.php") do |form|
|
||||
form['pixiv_id'] = Danbooru.config.pixiv_login
|
||||
form['pass'] = Danbooru.config.pixiv_password
|
||||
end.click_button
|
||||
end
|
||||
phpsessid = mech.cookie_jar.cookies.select{|c| c.name == "PHPSESSID"}.first
|
||||
Cache.put("pixiv-phpsessid", phpsessid.value, 1.month) if phpsessid
|
||||
end
|
||||
|
||||
mech
|
||||
end
|
||||
def file_url
|
||||
image_url || zip_url
|
||||
end
|
||||
|
||||
protected
|
||||
@@ -191,6 +178,31 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def get_zip_url_from_page(page)
|
||||
scripts = page.search("body script").find_all do |node|
|
||||
node.text =~ /_ugoira600x600\.zip/
|
||||
end
|
||||
|
||||
if scripts.any?
|
||||
javascript = scripts.first.text
|
||||
|
||||
json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
|
||||
data = JSON.parse(json)
|
||||
zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
|
||||
frame_data = data["frames"]
|
||||
|
||||
if javascript =~ /illustSize\s*=\s*\[\s*(\d+)\s*,\s*(\d+)\s*\]/
|
||||
image_width = $1.to_i
|
||||
image_height = $2.to_i
|
||||
else
|
||||
image_width = 600
|
||||
image_height = 600
|
||||
end
|
||||
|
||||
return [zip_url, frame_data, image_width, image_height]
|
||||
end
|
||||
end
|
||||
|
||||
def get_tags_from_page(page)
|
||||
# puts page.root.to_xhtml
|
||||
|
||||
|
||||
Reference in New Issue
Block a user