copied over download.rb changes from oldbooru

This commit is contained in:
albert
2011-04-08 13:18:22 -04:00
parent b4ae7648d4
commit c348f6118f
3 changed files with 132 additions and 105 deletions

View File

@@ -1,40 +1,69 @@
class Download class Download
class Error < Exception ; end class Error < Exception ; end
attr_accessor :source, :content_type attr_accessor :source, :content_type, :file_path
def initialize(source, file_path) def initialize(source, file_path)
@source = source @source = source
@file_path = file_path @file_path = file_path
end end
# Downloads to @file_path
def download! def download!
http_get_streaming(@source) do |response| http_get_streaming do |response|
self.content_type = response["Content-Type"] self.content_type = response["Content-Type"]
File.open(@file_path, "wb") do |out| File.open(file_path, "wb") do |out|
response.read_body(out) response.read_body(out)
end end
end end
@source = fix_image_board_sources(@source) after_download
end end
# private def pixiv_rewrite(headers)
def handle_pixiv(source, headers) return unless source =~ /pixiv\.net/
if source =~ /pixiv\.net/
headers["Referer"] = "http://www.pixiv.net"
# Don't download the small version headers["Referer"] = "http://www.pixiv.net"
if source =~ %r!(/img/.+?/.+?)_m.+$!
match = $1 # Don't download the small version
source.sub!(match + "_m", match) if source =~ %r!(/img/.+?/.+?)_m.+$!
match = $1
source.sub!(match + "_m", match)
end
# Download the big version if it exists
if source =~ %r!(\d+_p\d+)\.!
match = $1
repl = match.sub(/_p/, "_big_p")
big_source = source.sub(match, repl)
if pixiv_http_exists?(big_source)
self.source = big_source
end end
end end
source
end end
def http_get_streaming(source, options = {}) def pixiv_http_exists?
# example: http://img01.pixiv.net/img/as-special/15649262_big_p2.jpg
exists = false
uri = URI.parse(source)
Net::HTTP.start(uri.host, uri.port) do |http|
headers = {"Referer" => "http://www.pixiv.net", "User-Agent" => "#{Danbooru.config.app_name}/#{Danbooru.config.version}"}
http.request_head(uri.request_uri, headers) do |res|
if res.is_a?(Net::HTTPSuccess)
exists = true
end
end
end
exists
end
def before_download(headers)
pixiv_rewrite(headers)
end
def after_download
fix_image_board_sources
end
def http_get_streaming(options = {})
max_size = options[:max_size] || Danbooru.config.max_file_size max_size = options[:max_size] || Danbooru.config.max_file_size
max_size = nil if max_size == 0 # unlimited max_size = nil if max_size == 0 # unlimited
limit = 4 limit = 4
@@ -51,7 +80,7 @@ class Download
headers = { headers = {
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}" "User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
} }
source = handle_pixiv(source, headers) before_download(headers)
url = URI.parse(source) url = URI.parse(source)
http.request_get(url.request_uri, headers) do |res| http.request_get(url.request_uri, headers) do |res|
case res case res
@@ -78,11 +107,9 @@ class Download
end # while end # while
end # def end # def
def fix_image_board_sources(source) def fix_image_board_sources
if source =~ /\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/ if source =~ /\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/
"Image board" self.source = "Image board"
else
source
end end
end end
end end

View File

@@ -3,92 +3,92 @@ class PixivProxy
url =~ /pixiv\.net/ url =~ /pixiv\.net/
end end
def self.get(url) def self.get(url)
if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i
url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}" url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
get_single(url) get_single(url)
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/ elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
get_single(url) get_single(url)
# elsif url =~ /member_illust\.php/ && url =~ /id=/ # elsif url =~ /member_illust\.php/ && url =~ /id=/
# get_listing(url) # get_listing(url)
# elsif url =~ /member\.php/ && url =~ /id=/ # elsif url =~ /member\.php/ && url =~ /id=/
# get_profile(url) # get_profile(url)
else else
{} {}
end end
end end
def self.get_profile(url) def self.get_profile(url)
url = URI.parse(url).request_uri url = URI.parse(url).request_uri
mech = create_mechanize mech = create_mechanize
hash = {} hash = {}
mech.get(url) do |page| mech.get(url) do |page|
hash[:artist] = page.search("a.avatar_m").attr("title").value hash[:artist] = page.search("a.avatar_m").attr("title").value
hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1] hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1]
end end
hash hash
end end
def self.get_single(url) def self.get_single(url)
url = URI.parse(url).request_uri url = URI.parse(url).request_uri
mech = create_mechanize mech = create_mechanize
hash = {} hash = {}
mech.get(url) do |page| mech.get(url) do |page|
if page.search("a.avatar_m") if page.search("a.avatar_m")
hash[:artist] = page.search("a.avatar_m").attr("title").value hash[:artist] = page.search("a.avatar_m").attr("title").value
hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".") hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".")
hash[:profile_url] = page.search("a.avatar_m").attr("href").value hash[:profile_url] = page.search("a.avatar_m").attr("href").value
hash[:jp_tags] = page.search("span#tags/a").map do |node| hash[:jp_tags] = page.search("span#tags/a").map do |node|
[node.inner_text, node.attribute("href").to_s] [node.inner_text, node.attribute("href").to_s]
end.reject {|x| x[0].empty?} end.reject {|x| x[0].empty?}
else else
hash[:artist] = "?" hash[:artist] = "?"
hash[:image_url] = "?" hash[:image_url] = "?"
hash[:profile_url] = "?" hash[:profile_url] = "?"
hash[:jp_tags] = [] hash[:jp_tags] = []
end end
end end
hash hash
end end
def self.get_listing(url) def self.get_listing(url)
mech = create_mechanize mech = create_mechanize
p = 1 p = 1
url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1" url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1"
more = true more = true
images = [] images = []
while more while more
mech.get(url) do |page| mech.get(url) do |page|
links = page.search("div#illust_c4/ul/li/a") links = page.search("div#illust_c4/ul/li/a")
if links.empty? if links.empty?
more = false more = false
else else
images += links.map do |node| images += links.map do |node|
image_src = node.child.attribute("src").to_s image_src = node.child.attribute("src").to_s
[image_src, image_src.sub("_s.", "."), node.attribute("href").to_s] [image_src, image_src.sub("_s.", "."), node.attribute("href").to_s]
end end
end end
p += 1 p += 1
url.sub!(/&p=\d+/, "&p=#{p}") url.sub!(/&p=\d+/, "&p=#{p}")
end end
end end
images images
end end
def self.create_mechanize def self.create_mechanize
mech = Mechanize.new mech = Mechanize.new
mech.get("http://www.pixiv.net") do |page| mech.get("http://www.pixiv.net") do |page|
page.form_with(:action => "/login.php") do |form| page.form_with(:action => "/login.php") do |form|
form.pixiv_id = "uroobnad" form.pixiv_id = "uroobnad"
form.pass = "uroobnad556" form.pass = "uroobnad556"
end.click_button end.click_button
end end
mech mech
end end
end end

View File

@@ -13,7 +13,7 @@ class DownloadTest < ActiveSupport::TestCase
end end
should "stream a file from an HTTP source" do should "stream a file from an HTTP source" do
@download.http_get_streaming(@download.source) do |resp| @download.http_get_streaming do |resp|
assert_equal("200", resp.code) assert_equal("200", resp.code)
assert(resp["Content-Length"].to_i > 0, "File should be larger than 0 bytes") assert(resp["Content-Length"].to_i > 0, "File should be larger than 0 bytes")
end end
@@ -21,7 +21,7 @@ class DownloadTest < ActiveSupport::TestCase
should "throw an exception when the file is larger than the maximum" do should "throw an exception when the file is larger than the maximum" do
assert_raise(Download::Error) do assert_raise(Download::Error) do
@download.http_get_streaming(@download.source, :max_size => 1) do |resp| @download.http_get_streaming(:max_size => 1) do |resp|
end end
end end
end end