Rewrite Pixiv small/medium images to full size images.
There are two kinds of thumbnails that need to be rewritten. First case:
new /img-master/ URLs need to be rewritten to /img-original/ URLs like this:
http://i2.pixiv.net/c/600x600/img-master/img/2014/10/04/03/59/52/46337015_p0_master1200.jpg
=> http://i2.pixiv.net/img-original/img/2014/10/04/03/59/52/46337015_p0.png
This is what `rewrite_new_medium_images` does. In order to do this, it
has to use the Pixiv API to get the correct file extension.
Second case: Old small/medium size URLs need to be rewritten to full
size URLs like this:
http://i2.pixiv.net/img18/img/evazion/14901720_m.png
=> http://i2.pixiv.net/img18/img/evazion/14901720.png
But when the medium size URL is actually for a manga image, it needs to be
rewritten to the big manga URL instead:
http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
=> http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg
But we can't tell whether it's a manga image from the URL, so we have to
use the manga page count from either the HTML page or the API to
determine whether it's part of a manga gallery.
So in order to make this work, `rewrite_old_small_and_medium_images`
takes an `is_manga` flag. `Sources::Strategies::Pixiv#get` gets the
page count from the HTML and passes the `is_manga` flag on down through
the call chain until `rewrite_old_small_and_medium_images` gets it.
When `rewrite_old_small_and_medium_images` is called from
`Downloads::Strategies::Pixiv#rewrite_thumbnails`, the `is_manga` flag
isn't passed in because we didn't scrape the HTML. This causes
`rewrite_old_small_and_medium_images` to look it up in the API instead.
This commit is contained in:
@@ -6,7 +6,7 @@ module Downloads
|
||||
url, headers = rewrite_headers(url, headers)
|
||||
url, headers = rewrite_cdn(url, headers)
|
||||
url, headers = rewrite_html_pages(url, headers)
|
||||
url, headers = rewrite_small_and_medium_images(url, headers)
|
||||
url, headers = rewrite_thumbnails(url, headers)
|
||||
url, headers = rewrite_old_small_manga_pages(url, headers)
|
||||
end
|
||||
|
||||
@@ -19,10 +19,16 @@ module Downloads
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://www.pixiv.net/i/18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
||||
# Plus this:
|
||||
# i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg
|
||||
def rewrite_html_pages(url, headers)
|
||||
# example: http://www.pixiv.net/member_illust.php?mode=big&illust_id=23828655
|
||||
|
||||
if url =~ %r!illust_id=\d+!
|
||||
if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
|
||||
source = ::Sources::Strategies::Pixiv.new(url)
|
||||
source.get
|
||||
return [source.image_url, headers]
|
||||
@@ -31,15 +37,12 @@ module Downloads
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_small_and_medium_images(url, headers)
|
||||
if url =~ %r!(/img/.+?/.+?)_m.+$!
|
||||
match = $1
|
||||
url.sub!(match + "_m", match)
|
||||
elsif url !~ %r!/img-inf/! && url =~ %r!(/img/.+?/.+?)_s.+$!
|
||||
match = $1
|
||||
url.sub!(match + "_s", match)
|
||||
end
|
||||
|
||||
# Rewrite these:
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
def rewrite_thumbnails(url, headers)
|
||||
source = ::Sources::Strategies::Pixiv.new(url)
|
||||
url = source.rewrite_thumbnails(url)
|
||||
return [url, headers]
|
||||
end
|
||||
|
||||
@@ -65,7 +68,7 @@ module Downloads
|
||||
|
||||
def rewrite_cdn(url, headers)
|
||||
if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
|
||||
url.sub!(".edgesuite.net", "")
|
||||
url = url.sub(".edgesuite.net", "")
|
||||
end
|
||||
|
||||
return [url, headers]
|
||||
|
||||
@@ -29,14 +29,91 @@ module Sources
|
||||
agent.get(URI.parse(normalized_url)) do |page|
|
||||
@artist_name, @profile_url = get_profile_from_page(page)
|
||||
@pixiv_moniker = get_moniker_from_page(page)
|
||||
@image_url = get_image_url_from_page(page)
|
||||
@tags = get_tags_from_page(page)
|
||||
@page_count = get_page_count_from_page(page)
|
||||
|
||||
is_manga = @page_count > 1
|
||||
@image_url = get_image_url_from_page(page, is_manga)
|
||||
end
|
||||
end
|
||||
|
||||
def rewrite_thumbnails(thumbnail_url, is_manga=nil)
|
||||
thumbnail_url = rewrite_new_medium_images(thumbnail_url)
|
||||
thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
|
||||
return thumbnail_url
|
||||
end
|
||||
|
||||
protected
|
||||
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
|
||||
def rewrite_new_medium_images(thumbnail_url)
|
||||
if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/.*/\d+_p\d+_\w+\.jpg!i
|
||||
thumbnail_url = thumbnail_url.sub(%r!/c/\d+x\d+/img-master/!i, '/img-original/')
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
|
||||
|
||||
page = manga_page_from_url(@url)
|
||||
thumbnail_url = thumbnail_url.sub(%r!_p(\d+)_\w+\.jpg$!i, "_p#{page}.")
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.
|
||||
|
||||
illust_id = illust_id_from_url(@url)
|
||||
get_metadata_from_spapi!(illust_id) do |metadata|
|
||||
file_ext = metadata[2]
|
||||
thumbnail_url += file_ext
|
||||
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
|
||||
end
|
||||
end
|
||||
|
||||
thumbnail_url
|
||||
end
|
||||
|
||||
# If the thumbnail is for a manga gallery, it needs to be rewritten like this:
|
||||
#
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||
# => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
|
||||
#
|
||||
# Otherwise, it needs to be rewritten like this:
|
||||
#
|
||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||
# => http://i2.pixiv.net/img18/img/evazion/14901720.png
|
||||
#
|
||||
def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
|
||||
if thumbnail_url =~ %r!/img/[^/]+/\d+_[ms]\.(?:jpg|jpeg|png|gif)!i
|
||||
if is_manga.nil?
|
||||
illust_id = illust_id_from_url(@url)
|
||||
get_metadata_from_spapi!(illust_id) do |metadata|
|
||||
page_count = metadata[19].to_i || 1
|
||||
is_manga = page_count > 1
|
||||
end
|
||||
end
|
||||
|
||||
if is_manga
|
||||
page = manga_page_from_url(@url)
|
||||
return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
|
||||
else
|
||||
return thumbnail_url.sub(/_[ms]\./, ".")
|
||||
end
|
||||
end
|
||||
|
||||
return thumbnail_url
|
||||
end
|
||||
|
||||
def manga_page_from_url(url)
|
||||
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
|
||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
|
||||
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
|
||||
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
||||
$1
|
||||
|
||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
|
||||
elsif url =~ /page=(\d+)/i
|
||||
$1
|
||||
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
|
||||
def get_profile_from_page(page)
|
||||
profile_url = page.search("a.user-link").first
|
||||
if profile_url
|
||||
@@ -62,15 +139,16 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
def get_image_url_from_page(page)
|
||||
def get_image_url_from_page(page, is_manga)
|
||||
elements = page.search("div.works_display a img").find_all do |node|
|
||||
node["src"] !~ /source\.pixiv\.net/
|
||||
end
|
||||
|
||||
if elements.any?
|
||||
elements.first.attr("src").sub(/_[ms]\./, ".")
|
||||
thumbnail_url = elements.first.attr("src")
|
||||
return rewrite_thumbnails(thumbnail_url, is_manga)
|
||||
else
|
||||
nil
|
||||
raise "Couldn't find image thumbnail URL in page: #{normalized_url}"
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
Reference in New Issue
Block a user