diff --git a/app/logical/downloads/strategies/pixiv.rb b/app/logical/downloads/strategies/pixiv.rb index 7af9ee509..77f04c788 100644 --- a/app/logical/downloads/strategies/pixiv.rb +++ b/app/logical/downloads/strategies/pixiv.rb @@ -6,7 +6,7 @@ module Downloads url, headers = rewrite_headers(url, headers) url, headers = rewrite_cdn(url, headers) url, headers = rewrite_html_pages(url, headers) - url, headers = rewrite_small_and_medium_images(url, headers) + url, headers = rewrite_thumbnails(url, headers) url, headers = rewrite_old_small_manga_pages(url, headers) end @@ -19,10 +19,16 @@ module Downloads return [url, headers] end + # Rewrite these: + # http://www.pixiv.net/i/18557054 + # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054 + # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1 + # Plus this: + # i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg def rewrite_html_pages(url, headers) - # example: http://www.pixiv.net/member_illust.php?mode=big&illust_id=23828655 - - if url =~ %r!illust_id=\d+! + if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i source = ::Sources::Strategies::Pixiv.new(url) source.get return [source.image_url, headers] @@ -31,15 +37,12 @@ module Downloads end end - def rewrite_small_and_medium_images(url, headers) - if url =~ %r!(/img/.+?/.+?)_m.+$! - match = $1 - url.sub!(match + "_m", match) - elsif url !~ %r!/img-inf/! && url =~ %r!(/img/.+?/.+?)_s.+$! - match = $1 - url.sub!(match + "_s", match) - end - + # Rewrite these: + # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg + # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg + def rewrite_thumbnails(url, headers) + source = ::Sources::Strategies::Pixiv.new(url) + url = source.rewrite_thumbnails(url) return [url, headers] end @@ -65,7 +68,7 @@ module Downloads def rewrite_cdn(url, headers) if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net} - url.sub!(".edgesuite.net", "") + url = url.sub(".edgesuite.net", "") end return [url, headers] diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 0b9b675c4..543acff87 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -29,14 +29,91 @@ module Sources agent.get(URI.parse(normalized_url)) do |page| @artist_name, @profile_url = get_profile_from_page(page) @pixiv_moniker = get_moniker_from_page(page) - @image_url = get_image_url_from_page(page) @tags = get_tags_from_page(page) @page_count = get_page_count_from_page(page) + + is_manga = @page_count > 1 + @image_url = get_image_url_from_page(page, is_manga) end end + def rewrite_thumbnails(thumbnail_url, is_manga=nil) + thumbnail_url = rewrite_new_medium_images(thumbnail_url) + thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga) + return thumbnail_url + end + protected + # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg + # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png + def rewrite_new_medium_images(thumbnail_url) + if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/.*/\d+_p\d+_\w+\.jpg!i + thumbnail_url = thumbnail_url.sub(%r!/c/\d+x\d+/img-master/!i, '/img-original/') + # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg + + page = manga_page_from_url(@url) + thumbnail_url = thumbnail_url.sub(%r!_p(\d+)_\w+\.jpg$!i, "_p#{page}.") + # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1. + + illust_id = illust_id_from_url(@url) + get_metadata_from_spapi!(illust_id) do |metadata| + file_ext = metadata[2] + thumbnail_url += file_ext + # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png + end + end + + thumbnail_url + end + + # If the thumbnail is for a manga gallery, it needs to be rewritten like this: + # + # http://i2.pixiv.net/img18/img/evazion/14901720_m.png + # => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png + # + # Otherwise, it needs to be rewritten like this: + # + # http://i2.pixiv.net/img18/img/evazion/14901720_m.png + # => http://i2.pixiv.net/img18/img/evazion/14901720.png + # + def rewrite_old_small_and_medium_images(thumbnail_url, is_manga) + if thumbnail_url =~ %r!/img/[^/]+/\d+_[ms]\.(?:jpg|jpeg|png|gif)!i + if is_manga.nil? + illust_id = illust_id_from_url(@url) + get_metadata_from_spapi!(illust_id) do |metadata| + page_count = metadata[19].to_i || 1 + is_manga = page_count > 1 + end + end + + if is_manga + page = manga_page_from_url(@url) + return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.") + else + return thumbnail_url.sub(/_[ms]\./, ".") + end + end + + return thumbnail_url + end + + def manga_page_from_url(url) + # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg + # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg + # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg + if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i + $1 + + # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0 + elsif url =~ /page=(\d+)/i + $1 + + else + 0 + end + end + def get_profile_from_page(page) profile_url = page.search("a.user-link").first if profile_url @@ -62,15 +139,16 @@ module Sources end end - def get_image_url_from_page(page) + def get_image_url_from_page(page, is_manga) elements = page.search("div.works_display a img").find_all do |node| node["src"] !~ /source\.pixiv\.net/ end if elements.any? - elements.first.attr("src").sub(/_[ms]\./, ".") + thumbnail_url = elements.first.attr("src") + return rewrite_thumbnails(thumbnail_url, is_manga) else - nil + raise "Couldn't find image thumbnail URL in page: #{normalized_url}" end end