copied over download.rb changes from oldbooru
This commit is contained in:
@@ -1,40 +1,69 @@
|
|||||||
class Download
|
class Download
|
||||||
class Error < Exception ; end
|
class Error < Exception ; end
|
||||||
|
|
||||||
attr_accessor :source, :content_type
|
attr_accessor :source, :content_type, :file_path
|
||||||
|
|
||||||
def initialize(source, file_path)
|
def initialize(source, file_path)
|
||||||
@source = source
|
@source = source
|
||||||
@file_path = file_path
|
@file_path = file_path
|
||||||
end
|
end
|
||||||
|
|
||||||
# Downloads to @file_path
|
|
||||||
def download!
|
def download!
|
||||||
http_get_streaming(@source) do |response|
|
http_get_streaming do |response|
|
||||||
self.content_type = response["Content-Type"]
|
self.content_type = response["Content-Type"]
|
||||||
File.open(@file_path, "wb") do |out|
|
File.open(file_path, "wb") do |out|
|
||||||
response.read_body(out)
|
response.read_body(out)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
@source = fix_image_board_sources(@source)
|
after_download
|
||||||
end
|
end
|
||||||
|
|
||||||
# private
|
def pixiv_rewrite(headers)
|
||||||
def handle_pixiv(source, headers)
|
return unless source =~ /pixiv\.net/
|
||||||
if source =~ /pixiv\.net/
|
|
||||||
headers["Referer"] = "http://www.pixiv.net"
|
|
||||||
|
|
||||||
# Don't download the small version
|
headers["Referer"] = "http://www.pixiv.net"
|
||||||
if source =~ %r!(/img/.+?/.+?)_m.+$!
|
|
||||||
match = $1
|
# Don't download the small version
|
||||||
source.sub!(match + "_m", match)
|
if source =~ %r!(/img/.+?/.+?)_m.+$!
|
||||||
|
match = $1
|
||||||
|
source.sub!(match + "_m", match)
|
||||||
|
end
|
||||||
|
|
||||||
|
# Download the big version if it exists
|
||||||
|
if source =~ %r!(\d+_p\d+)\.!
|
||||||
|
match = $1
|
||||||
|
repl = match.sub(/_p/, "_big_p")
|
||||||
|
big_source = source.sub(match, repl)
|
||||||
|
if pixiv_http_exists?(big_source)
|
||||||
|
self.source = big_source
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
source
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def http_get_streaming(source, options = {})
|
def pixiv_http_exists?
|
||||||
|
# example: http://img01.pixiv.net/img/as-special/15649262_big_p2.jpg
|
||||||
|
exists = false
|
||||||
|
uri = URI.parse(source)
|
||||||
|
Net::HTTP.start(uri.host, uri.port) do |http|
|
||||||
|
headers = {"Referer" => "http://www.pixiv.net", "User-Agent" => "#{Danbooru.config.app_name}/#{Danbooru.config.version}"}
|
||||||
|
http.request_head(uri.request_uri, headers) do |res|
|
||||||
|
if res.is_a?(Net::HTTPSuccess)
|
||||||
|
exists = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
exists
|
||||||
|
end
|
||||||
|
|
||||||
|
def before_download(headers)
|
||||||
|
pixiv_rewrite(headers)
|
||||||
|
end
|
||||||
|
|
||||||
|
def after_download
|
||||||
|
fix_image_board_sources
|
||||||
|
end
|
||||||
|
|
||||||
|
def http_get_streaming(options = {})
|
||||||
max_size = options[:max_size] || Danbooru.config.max_file_size
|
max_size = options[:max_size] || Danbooru.config.max_file_size
|
||||||
max_size = nil if max_size == 0 # unlimited
|
max_size = nil if max_size == 0 # unlimited
|
||||||
limit = 4
|
limit = 4
|
||||||
@@ -51,7 +80,7 @@ class Download
|
|||||||
headers = {
|
headers = {
|
||||||
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
|
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
|
||||||
}
|
}
|
||||||
source = handle_pixiv(source, headers)
|
before_download(headers)
|
||||||
url = URI.parse(source)
|
url = URI.parse(source)
|
||||||
http.request_get(url.request_uri, headers) do |res|
|
http.request_get(url.request_uri, headers) do |res|
|
||||||
case res
|
case res
|
||||||
@@ -78,11 +107,9 @@ class Download
|
|||||||
end # while
|
end # while
|
||||||
end # def
|
end # def
|
||||||
|
|
||||||
def fix_image_board_sources(source)
|
def fix_image_board_sources
|
||||||
if source =~ /\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/
|
if source =~ /\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/
|
||||||
"Image board"
|
self.source = "Image board"
|
||||||
else
|
|
||||||
source
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -3,92 +3,92 @@ class PixivProxy
|
|||||||
url =~ /pixiv\.net/
|
url =~ /pixiv\.net/
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.get(url)
|
def self.get(url)
|
||||||
if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i
|
if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i
|
||||||
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
|
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
|
||||||
get_single(url)
|
get_single(url)
|
||||||
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
|
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
|
||||||
get_single(url)
|
get_single(url)
|
||||||
# elsif url =~ /member_illust\.php/ && url =~ /id=/
|
# elsif url =~ /member_illust\.php/ && url =~ /id=/
|
||||||
# get_listing(url)
|
# get_listing(url)
|
||||||
# elsif url =~ /member\.php/ && url =~ /id=/
|
# elsif url =~ /member\.php/ && url =~ /id=/
|
||||||
# get_profile(url)
|
# get_profile(url)
|
||||||
else
|
else
|
||||||
{}
|
{}
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.get_profile(url)
|
def self.get_profile(url)
|
||||||
url = URI.parse(url).request_uri
|
url = URI.parse(url).request_uri
|
||||||
mech = create_mechanize
|
mech = create_mechanize
|
||||||
hash = {}
|
hash = {}
|
||||||
mech.get(url) do |page|
|
mech.get(url) do |page|
|
||||||
hash[:artist] = page.search("a.avatar_m").attr("title").value
|
hash[:artist] = page.search("a.avatar_m").attr("title").value
|
||||||
hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1]
|
hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1]
|
||||||
end
|
end
|
||||||
hash
|
hash
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.get_single(url)
|
def self.get_single(url)
|
||||||
url = URI.parse(url).request_uri
|
url = URI.parse(url).request_uri
|
||||||
mech = create_mechanize
|
mech = create_mechanize
|
||||||
hash = {}
|
hash = {}
|
||||||
mech.get(url) do |page|
|
mech.get(url) do |page|
|
||||||
if page.search("a.avatar_m")
|
if page.search("a.avatar_m")
|
||||||
hash[:artist] = page.search("a.avatar_m").attr("title").value
|
hash[:artist] = page.search("a.avatar_m").attr("title").value
|
||||||
hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".")
|
hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".")
|
||||||
hash[:profile_url] = page.search("a.avatar_m").attr("href").value
|
hash[:profile_url] = page.search("a.avatar_m").attr("href").value
|
||||||
hash[:jp_tags] = page.search("span#tags/a").map do |node|
|
hash[:jp_tags] = page.search("span#tags/a").map do |node|
|
||||||
[node.inner_text, node.attribute("href").to_s]
|
[node.inner_text, node.attribute("href").to_s]
|
||||||
end.reject {|x| x[0].empty?}
|
end.reject {|x| x[0].empty?}
|
||||||
else
|
else
|
||||||
hash[:artist] = "?"
|
hash[:artist] = "?"
|
||||||
hash[:image_url] = "?"
|
hash[:image_url] = "?"
|
||||||
hash[:profile_url] = "?"
|
hash[:profile_url] = "?"
|
||||||
hash[:jp_tags] = []
|
hash[:jp_tags] = []
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
hash
|
hash
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.get_listing(url)
|
def self.get_listing(url)
|
||||||
mech = create_mechanize
|
mech = create_mechanize
|
||||||
p = 1
|
p = 1
|
||||||
url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1"
|
url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1"
|
||||||
more = true
|
more = true
|
||||||
images = []
|
images = []
|
||||||
|
|
||||||
while more
|
while more
|
||||||
mech.get(url) do |page|
|
mech.get(url) do |page|
|
||||||
links = page.search("div#illust_c4/ul/li/a")
|
links = page.search("div#illust_c4/ul/li/a")
|
||||||
|
|
||||||
if links.empty?
|
if links.empty?
|
||||||
more = false
|
more = false
|
||||||
else
|
else
|
||||||
images += links.map do |node|
|
images += links.map do |node|
|
||||||
image_src = node.child.attribute("src").to_s
|
image_src = node.child.attribute("src").to_s
|
||||||
[image_src, image_src.sub("_s.", "."), node.attribute("href").to_s]
|
[image_src, image_src.sub("_s.", "."), node.attribute("href").to_s]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
p += 1
|
p += 1
|
||||||
url.sub!(/&p=\d+/, "&p=#{p}")
|
url.sub!(/&p=\d+/, "&p=#{p}")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
images
|
images
|
||||||
end
|
end
|
||||||
|
|
||||||
def self.create_mechanize
|
def self.create_mechanize
|
||||||
mech = Mechanize.new
|
mech = Mechanize.new
|
||||||
|
|
||||||
mech.get("http://www.pixiv.net") do |page|
|
mech.get("http://www.pixiv.net") do |page|
|
||||||
page.form_with(:action => "/login.php") do |form|
|
page.form_with(:action => "/login.php") do |form|
|
||||||
form.pixiv_id = "uroobnad"
|
form.pixiv_id = "uroobnad"
|
||||||
form.pass = "uroobnad556"
|
form.pass = "uroobnad556"
|
||||||
end.click_button
|
end.click_button
|
||||||
end
|
end
|
||||||
|
|
||||||
mech
|
mech
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class DownloadTest < ActiveSupport::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
should "stream a file from an HTTP source" do
|
should "stream a file from an HTTP source" do
|
||||||
@download.http_get_streaming(@download.source) do |resp|
|
@download.http_get_streaming do |resp|
|
||||||
assert_equal("200", resp.code)
|
assert_equal("200", resp.code)
|
||||||
assert(resp["Content-Length"].to_i > 0, "File should be larger than 0 bytes")
|
assert(resp["Content-Length"].to_i > 0, "File should be larger than 0 bytes")
|
||||||
end
|
end
|
||||||
@@ -21,7 +21,7 @@ class DownloadTest < ActiveSupport::TestCase
|
|||||||
|
|
||||||
should "throw an exception when the file is larger than the maximum" do
|
should "throw an exception when the file is larger than the maximum" do
|
||||||
assert_raise(Download::Error) do
|
assert_raise(Download::Error) do
|
||||||
@download.http_get_streaming(@download.source, :max_size => 1) do |resp|
|
@download.http_get_streaming(:max_size => 1) do |resp|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user