From c348f6118fcba60ac7b26baea233705fb7380cea Mon Sep 17 00:00:00 2001 From: albert Date: Fri, 8 Apr 2011 13:18:22 -0400 Subject: [PATCH] copied over download.rb changes from oldbooru --- app/logical/download.rb | 69 +++++++++++----- app/logical/pixiv_proxy.rb | 164 ++++++++++++++++++------------------- test/unit/download_test.rb | 4 +- 3 files changed, 132 insertions(+), 105 deletions(-) diff --git a/app/logical/download.rb b/app/logical/download.rb index 9aa2c3062..5f59d938f 100644 --- a/app/logical/download.rb +++ b/app/logical/download.rb @@ -1,40 +1,69 @@ class Download class Error < Exception ; end - attr_accessor :source, :content_type + attr_accessor :source, :content_type, :file_path def initialize(source, file_path) @source = source @file_path = file_path end - # Downloads to @file_path def download! - http_get_streaming(@source) do |response| + http_get_streaming do |response| self.content_type = response["Content-Type"] - File.open(@file_path, "wb") do |out| + File.open(file_path, "wb") do |out| response.read_body(out) end end - @source = fix_image_board_sources(@source) + after_download end - # private - def handle_pixiv(source, headers) - if source =~ /pixiv\.net/ - headers["Referer"] = "http://www.pixiv.net" + def pixiv_rewrite(headers) + return unless source =~ /pixiv\.net/ - # Don't download the small version - if source =~ %r!(/img/.+?/.+?)_m.+$! - match = $1 - source.sub!(match + "_m", match) + headers["Referer"] = "http://www.pixiv.net" + + # Don't download the small version + if source =~ %r!(/img/.+?/.+?)_m.+$! + match = $1 + source.sub!(match + "_m", match) + end + + # Download the big version if it exists + if source =~ %r!(\d+_p\d+)\.! + match = $1 + repl = match.sub(/_p/, "_big_p") + big_source = source.sub(match, repl) + if pixiv_http_exists?(big_source) + self.source = big_source end end - - source end - def http_get_streaming(source, options = {}) + def pixiv_http_exists? + # example: http://img01.pixiv.net/img/as-special/15649262_big_p2.jpg + exists = false + uri = URI.parse(source) + Net::HTTP.start(uri.host, uri.port) do |http| + headers = {"Referer" => "http://www.pixiv.net", "User-Agent" => "#{Danbooru.config.app_name}/#{Danbooru.config.version}"} + http.request_head(uri.request_uri, headers) do |res| + if res.is_a?(Net::HTTPSuccess) + exists = true + end + end + end + exists + end + + def before_download(headers) + pixiv_rewrite(headers) + end + + def after_download + fix_image_board_sources + end + + def http_get_streaming(options = {}) max_size = options[:max_size] || Danbooru.config.max_file_size max_size = nil if max_size == 0 # unlimited limit = 4 @@ -51,7 +80,7 @@ class Download headers = { "User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}" } - source = handle_pixiv(source, headers) + before_download(headers) url = URI.parse(source) http.request_get(url.request_uri, headers) do |res| case res @@ -78,11 +107,9 @@ class Download end # while end # def - def fix_image_board_sources(source) + def fix_image_board_sources if source =~ /\/src\/\d{12,}|urnc\.yi\.org|yui\.cynthia\.bne\.jp/ - "Image board" - else - source + self.source = "Image board" end end end diff --git a/app/logical/pixiv_proxy.rb b/app/logical/pixiv_proxy.rb index 4dc192081..febb33296 100644 --- a/app/logical/pixiv_proxy.rb +++ b/app/logical/pixiv_proxy.rb @@ -3,92 +3,92 @@ class PixivProxy url =~ /pixiv\.net/ end - def self.get(url) - if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i - url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}" - get_single(url) - elsif url =~ /member_illust\.php/ && url =~ /illust_id=/ - get_single(url) + def self.get(url) + if url =~ /\/(\d+)(_m)?\.(jpg|jpeg|png|gif)/i + url = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}" + get_single(url) + elsif url =~ /member_illust\.php/ && url =~ /illust_id=/ + get_single(url) # elsif url =~ /member_illust\.php/ && url =~ /id=/ # get_listing(url) # elsif url =~ /member\.php/ && url =~ /id=/ # get_profile(url) - else + else {} - end - end - - def self.get_profile(url) - url = URI.parse(url).request_uri - mech = create_mechanize - hash = {} - mech.get(url) do |page| - hash[:artist] = page.search("a.avatar_m").attr("title").value - hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1] - end - hash - end - - def self.get_single(url) - url = URI.parse(url).request_uri - mech = create_mechanize - hash = {} - mech.get(url) do |page| - if page.search("a.avatar_m") - hash[:artist] = page.search("a.avatar_m").attr("title").value - hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".") - hash[:profile_url] = page.search("a.avatar_m").attr("href").value - hash[:jp_tags] = page.search("span#tags/a").map do |node| - [node.inner_text, node.attribute("href").to_s] - end.reject {|x| x[0].empty?} - else - hash[:artist] = "?" - hash[:image_url] = "?" - hash[:profile_url] = "?" - hash[:jp_tags] = [] - end - end - hash - end - - def self.get_listing(url) - mech = create_mechanize - p = 1 - url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1" - more = true - images = [] - - while more - mech.get(url) do |page| - links = page.search("div#illust_c4/ul/li/a") - - if links.empty? - more = false - else - images += links.map do |node| - image_src = node.child.attribute("src").to_s - [image_src, image_src.sub("_s.", "."), node.attribute("href").to_s] - end - end - - p += 1 - url.sub!(/&p=\d+/, "&p=#{p}") - end - end - - images - end + end + end + + def self.get_profile(url) + url = URI.parse(url).request_uri + mech = create_mechanize + hash = {} + mech.get(url) do |page| + hash[:artist] = page.search("a.avatar_m").attr("title").value + hash[:listing_url] = "/member_illust.php?id=" + url[/id=(\d+)/, 1] + end + hash + end + + def self.get_single(url) + url = URI.parse(url).request_uri + mech = create_mechanize + hash = {} + mech.get(url) do |page| + if page.search("a.avatar_m") + hash[:artist] = page.search("a.avatar_m").attr("title").value + hash[:image_url] = page.search("div.works_display/a/img").attr("src").value.sub("_m.", ".") + hash[:profile_url] = page.search("a.avatar_m").attr("href").value + hash[:jp_tags] = page.search("span#tags/a").map do |node| + [node.inner_text, node.attribute("href").to_s] + end.reject {|x| x[0].empty?} + else + hash[:artist] = "?" + hash[:image_url] = "?" + hash[:profile_url] = "?" + hash[:jp_tags] = [] + end + end + hash + end + + def self.get_listing(url) + mech = create_mechanize + p = 1 + url = URI.parse(url).request_uri.sub(/&p=\d+/, "") + "&p=1" + more = true + images = [] + + while more + mech.get(url) do |page| + links = page.search("div#illust_c4/ul/li/a") + + if links.empty? + more = false + else + images += links.map do |node| + image_src = node.child.attribute("src").to_s + [image_src, image_src.sub("_s.", "."), node.attribute("href").to_s] + end + end + + p += 1 + url.sub!(/&p=\d+/, "&p=#{p}") + end + end + + images + end - def self.create_mechanize - mech = Mechanize.new - - mech.get("http://www.pixiv.net") do |page| - page.form_with(:action => "/login.php") do |form| - form.pixiv_id = "uroobnad" - form.pass = "uroobnad556" - end.click_button - end - - mech - end + def self.create_mechanize + mech = Mechanize.new + + mech.get("http://www.pixiv.net") do |page| + page.form_with(:action => "/login.php") do |form| + form.pixiv_id = "uroobnad" + form.pass = "uroobnad556" + end.click_button + end + + mech + end end diff --git a/test/unit/download_test.rb b/test/unit/download_test.rb index 1819088f7..c736c4d87 100644 --- a/test/unit/download_test.rb +++ b/test/unit/download_test.rb @@ -13,7 +13,7 @@ class DownloadTest < ActiveSupport::TestCase end should "stream a file from an HTTP source" do - @download.http_get_streaming(@download.source) do |resp| + @download.http_get_streaming do |resp| assert_equal("200", resp.code) assert(resp["Content-Length"].to_i > 0, "File should be larger than 0 bytes") end @@ -21,7 +21,7 @@ class DownloadTest < ActiveSupport::TestCase should "throw an exception when the file is larger than the maximum" do assert_raise(Download::Error) do - @download.http_get_streaming(@download.source, :max_size => 1) do |resp| + @download.http_get_streaming(:max_size => 1) do |resp| end end end