Merge pull request #2263 from evazion/new-pixiv-urls-fixes
Fix artist finder and URL rewriting for new Pixiv URLs
This commit is contained in:
@@ -118,13 +118,7 @@ class ArtistsController < ApplicationController
|
||||
end
|
||||
|
||||
def finder
|
||||
url = params[:url]
|
||||
headers = {
|
||||
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
|
||||
}
|
||||
url, headers = Downloads::Strategies::Pixiv.new.rewrite(url, headers)
|
||||
|
||||
@artists = Artist.url_matches(url).order("id desc").limit(20)
|
||||
@artists = Artist.url_matches(params[:url]).order("id desc").limit(20)
|
||||
respond_with(@artists) do |format|
|
||||
format.xml do
|
||||
render :xml => @artists.to_xml(:include => [:urls], :root => "artists")
|
||||
|
||||
@@ -7,9 +7,8 @@ class UploadsController < ApplicationController
|
||||
def new
|
||||
@upload = Upload.new
|
||||
if params[:url]
|
||||
@post = Post.find_by_source(params[:url])
|
||||
|
||||
@normalized_url = params[:url]
|
||||
|
||||
headers = {
|
||||
"User-Agent" => "#{Danbooru.config.safe_app_name}/#{Danbooru.config.version}"
|
||||
}
|
||||
@@ -17,6 +16,8 @@ class UploadsController < ApplicationController
|
||||
@normalized_url, headers = strategy.new.rewrite(@normalized_url, headers)
|
||||
end
|
||||
|
||||
@post = Post.find_by_source(@normalized_url)
|
||||
|
||||
begin
|
||||
@source = Sources::Site.new(params[:url])
|
||||
rescue Exception
|
||||
|
||||
@@ -6,8 +6,8 @@ module Downloads
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_cdn(url, headers)
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_small_and_medium_images(url, headers)
|
||||
url, headers = rewrite_small_manga_pages(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
url, headers = rewrite_old_small_manga_pages(url, headers)
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
@@ -19,10 +19,16 @@ module Downloads
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://www.pixiv.net/i/18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
||||
# Plus this:
|
||||
# i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg
|
||||
def rewrite_html_pages(url, headers)
|
||||
# example: http://www.pixiv.net/member_illust.php?mode=big&illust_id=23828655
|
||||
|
||||
if url =~ %r!illust_id=\d+!
|
||||
if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
|
||||
source = ::Sources::Strategies::Pixiv.new(url)
|
||||
source.get
|
||||
return [source.image_url, headers]
|
||||
@@ -31,20 +37,24 @@ module Downloads
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_small_and_medium_images(url, headers)
|
||||
if url =~ %r!(/img/.+?/.+?)_m.+$!
|
||||
match = $1
|
||||
url.sub!(match + "_m", match)
|
||||
elsif url !~ %r!/img-inf/! && url =~ %r!(/img/.+?/.+?)_s.+$!
|
||||
match = $1
|
||||
url.sub!(match + "_s", match)
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
def rewrite_thumbnails(url, headers)
|
||||
source = ::Sources::Strategies::Pixiv.new(url)
|
||||
url = source.rewrite_thumbnails(url)
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
def rewrite_small_manga_pages(url, headers)
|
||||
if url =~ %r!(\d+_p\d+)\.!
|
||||
# Rewrite these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
|
||||
# http://img04.pixiv.net/img/syounen_no_uta/46170939_p0.jpg
|
||||
# but not these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
|
||||
def rewrite_old_small_manga_pages(url, headers)
|
||||
if url !~ %r!/img-(?:original|master)/img/!i && url =~ %r!/(\d+_p\d+)\.!i
|
||||
match = $1
|
||||
repl = match.sub(/_p/, "_big_p")
|
||||
big_url = url.sub(match, repl)
|
||||
@@ -58,7 +68,7 @@ module Downloads
|
||||
|
||||
def rewrite_cdn(url, headers)
|
||||
if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
|
||||
url.sub!(".edgesuite.net", "")
|
||||
url = url.sub(".edgesuite.net", "")
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
# encoding: UTF-8
|
||||
|
||||
module Sources
|
||||
class Error < Exception ; end
|
||||
|
||||
class Site
|
||||
attr_reader :url, :strategy
|
||||
delegate :get, :referer_url, :site_name, :artist_name, :profile_url, :image_url, :tags, :artist_record, :unique_id, :page_count, :to => :strategy
|
||||
@@ -20,6 +22,18 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def normalize_for_artist_finder!
|
||||
if available?
|
||||
begin
|
||||
return strategy.normalize_for_artist_finder!
|
||||
rescue Sources::Error
|
||||
return url
|
||||
end
|
||||
else
|
||||
return url
|
||||
end
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
untranslated_tags = tags
|
||||
untranslated_tags = untranslated_tags.map(&:first)
|
||||
|
||||
@@ -18,6 +18,10 @@ module Sources
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
def normalize_for_artist_finder!
|
||||
url
|
||||
end
|
||||
|
||||
def site_name
|
||||
raise NotImplementedError
|
||||
end
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
# encoding: UTF-8
|
||||
|
||||
require 'csv'
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
class Pixiv < Base
|
||||
@@ -20,21 +22,112 @@ module Sources
|
||||
end
|
||||
|
||||
def unique_id
|
||||
image_url =~ /\/img\/([^\/]+)/
|
||||
$1
|
||||
@pixiv_moniker
|
||||
end
|
||||
|
||||
def normalize_for_artist_finder!
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
|
||||
if url =~ %r!/img/([^/]+)/\d+(?:_\w+)?\.(?:jpg|jpeg|png|gif)!i
|
||||
username = $1
|
||||
else
|
||||
illust_id = illust_id_from_url(url)
|
||||
get_metadata_from_spapi!(illust_id) do |metadata|
|
||||
username = metadata[24]
|
||||
end
|
||||
end
|
||||
|
||||
"http://img.pixiv.net/img/#{username}"
|
||||
end
|
||||
|
||||
def get
|
||||
agent.get(URI.parse(normalized_url)) do |page|
|
||||
@artist_name, @profile_url = get_profile_from_page(page)
|
||||
@image_url = get_image_url_from_page(page)
|
||||
@pixiv_moniker = get_moniker_from_page(page)
|
||||
@tags = get_tags_from_page(page)
|
||||
@page_count = get_page_count_from_page(page)
|
||||
|
||||
is_manga = @page_count > 1
|
||||
@image_url = get_image_url_from_page(page, is_manga)
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_thumbnails(thumbnail_url, is_manga=nil)
|
||||
thumbnail_url = rewrite_new_medium_images(thumbnail_url)
|
||||
thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
|
||||
return thumbnail_url
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
|
||||
def rewrite_new_medium_images(thumbnail_url)
|
||||
if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/.*/\d+_p\d+_\w+\.jpg!i
|
||||
thumbnail_url = thumbnail_url.sub(%r!/c/\d+x\d+/img-master/!i, '/img-original/')
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
|
||||
|
||||
page = manga_page_from_url(@url)
|
||||
thumbnail_url = thumbnail_url.sub(%r!_p(\d+)_\w+\.jpg$!i, "_p#{page}.")
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.
|
||||
|
||||
illust_id = illust_id_from_url(@url)
|
||||
get_metadata_from_spapi!(illust_id) do |metadata|
|
||||
file_ext = metadata[2]
|
||||
thumbnail_url += file_ext
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
|
||||
end
|
||||
end
|
||||
|
||||
thumbnail_url
|
||||
end
|
||||
|
||||
# If the thumbnail is for a manga gallery, it needs to be rewritten like this:
|
||||
#
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||
# => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
|
||||
#
|
||||
# Otherwise, it needs to be rewritten like this:
|
||||
#
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||
# => http://i2.pixiv.net/img18/img/evazion/14901720.png
|
||||
#
|
||||
def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
|
||||
if thumbnail_url =~ %r!/img/[^/]+/\d+_[ms]\.(?:jpg|jpeg|png|gif)!i
|
||||
if is_manga.nil?
|
||||
illust_id = illust_id_from_url(@url)
|
||||
get_metadata_from_spapi!(illust_id) do |metadata|
|
||||
page_count = metadata[19].to_i || 1
|
||||
is_manga = page_count > 1
|
||||
end
|
||||
end
|
||||
|
||||
if is_manga
|
||||
page = manga_page_from_url(@url)
|
||||
return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
|
||||
else
|
||||
return thumbnail_url.sub(/_[ms]\./, ".")
|
||||
end
|
||||
end
|
||||
|
||||
return thumbnail_url
|
||||
end
|
||||
|
||||
def manga_page_from_url(url)
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
|
||||
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
||||
$1
|
||||
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
|
||||
elsif url =~ /page=(\d+)/i
|
||||
$1
|
||||
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
def get_profile_from_page(page)
|
||||
profile_url = page.search("a.user-link").first
|
||||
if profile_url
|
||||
@@ -49,15 +142,27 @@ module Sources
|
||||
return [artist_name, profile_url]
|
||||
end
|
||||
|
||||
def get_image_url_from_page(page)
|
||||
def get_moniker_from_page(page)
|
||||
# <a class="tab-feed" href="/stacc/gennmai-226">Feed</a>
|
||||
stacc_link = page.search("a.tab-feed").first
|
||||
|
||||
if not stacc_link.nil?
|
||||
stacc_link.attr("href").sub(%r!^/stacc/!i, '')
|
||||
else
|
||||
raise Sources::Error.new("Couldn't find Pixiv moniker in page: #{normalized_url}")
|
||||
end
|
||||
end
|
||||
|
||||
def get_image_url_from_page(page, is_manga)
|
||||
elements = page.search("div.works_display a img").find_all do |node|
|
||||
node["src"] !~ /source\.pixiv\.net/
|
||||
end
|
||||
|
||||
if elements.any?
|
||||
elements.first.attr("src").sub(/_[ms]\./, ".")
|
||||
thumbnail_url = elements.first.attr("src")
|
||||
return rewrite_thumbnails(thumbnail_url, is_manga)
|
||||
else
|
||||
nil
|
||||
raise Sources::Error.new("Couldn't find image thumbnail URL in page: #{normalized_url}")
|
||||
end
|
||||
end
|
||||
|
||||
@@ -87,11 +192,11 @@ module Sources
|
||||
|
||||
def get_page_count_from_page(page)
|
||||
elements = page.search("ul.meta li").find_all do |node|
|
||||
node.text =~ /Manga|漫画/
|
||||
node.text =~ /Manga|漫画|複数枚投稿/
|
||||
end
|
||||
|
||||
if elements.any?
|
||||
elements[0].text =~ /(?:Manga|漫画) (\d+)P/
|
||||
elements[0].text =~ /(?:Manga|漫画|複数枚投稿) (\d+)P/
|
||||
$1.to_i
|
||||
else
|
||||
1
|
||||
@@ -99,19 +204,61 @@ module Sources
|
||||
end
|
||||
|
||||
def normalized_url
|
||||
@normalized_url ||= begin
|
||||
if url =~ /\/(\d+)(?:_big)?(?:_m|_p\d+)?\.(?:jpg|jpeg|png|gif)/i
|
||||
"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{$1}"
|
||||
elsif url =~ /mode=big/
|
||||
url.sub(/mode=big/, "mode=medium")
|
||||
elsif url =~ /member_illust\.php/ && url =~ /illust_id=/
|
||||
url
|
||||
illust_id = illust_id_from_url(@url)
|
||||
"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}"
|
||||
end
|
||||
|
||||
# Refer to http://danbooru.donmai.us/wiki_pages/58938 for documentation on the Pixiv API.
|
||||
def get_metadata_from_spapi!(illust_id)
|
||||
phpsessid = agent.cookies.select do |cookie| cookie.name == "PHPSESSID" end.first.value
|
||||
spapi_url = "http://spapi.pixiv.net/iphone/illust.php?illust_id=#{illust_id}&PHPSESSID=#{phpsessid}"
|
||||
|
||||
agent.get(spapi_url) do |response|
|
||||
metadata = CSV.parse(response.content.force_encoding("UTF-8")).first
|
||||
|
||||
if metadata.nil?
|
||||
raise Sources::Error.new("Couldn't get Pixiv API metadata from #{spapi_url}.")
|
||||
else
|
||||
nil
|
||||
yield metadata
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def illust_id_from_url(url)
|
||||
# http://img18.pixiv.net/img/evazion/14901720.png
|
||||
#
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720.png
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_s.png
|
||||
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
|
||||
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
|
||||
#
|
||||
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg
|
||||
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
|
||||
#
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
|
||||
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
|
||||
#
|
||||
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
|
||||
if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
||||
$1
|
||||
|
||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
||||
elsif url =~ /illust_id=(\d+)/i
|
||||
$1
|
||||
|
||||
# http://www.pixiv.net/i/18557054
|
||||
elsif url =~ %r!pixiv\.net/i/(\d+)!i
|
||||
$1
|
||||
|
||||
else
|
||||
raise Sources::Error.new("Couldn't get illust ID from URL: #{url}")
|
||||
end
|
||||
end
|
||||
|
||||
def agent
|
||||
@agent ||= begin
|
||||
mech = Mechanize.new
|
||||
|
||||
@@ -22,6 +22,7 @@ class Artist < ActiveRecord::Base
|
||||
|
||||
module ClassMethods
|
||||
def find_all_by_url(url)
|
||||
url = Sources::Site.new(url).normalize_for_artist_finder!
|
||||
url = ArtistUrl.normalize(url)
|
||||
artists = []
|
||||
|
||||
|
||||
@@ -36,7 +36,7 @@
|
||||
<div class="input">
|
||||
<%= f.label :source %>
|
||||
<% if params[:url].present? %>
|
||||
<%= f.text_field :source, :size => 50, :value => params[:url] %>
|
||||
<%= f.text_field :source, :size => 50, :value => @normalized_url %>
|
||||
<% else %>
|
||||
<%= f.text_field :source, :size => 50 %>
|
||||
<% end %>
|
||||
|
||||
Reference in New Issue
Block a user