pixiv: fix illust id parsing (fix #4043).
* Tighten up illust id parsing to avoid misparsing ids from non-illust urls (sketch urls and novel urls). * Move id parsing tests from post_test.rb to sources/pixiv_test.rb. * Drop support for touch.pixiv.net urls. These urls are no longer used by Pixiv and aren't present as the source of any posts on Danbooru.
This commit is contained in:
@@ -1,4 +1,47 @@
|
|||||||
require 'csv'
|
# Pixiv
|
||||||
|
#
|
||||||
|
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
|
||||||
|
#
|
||||||
|
# * https://i.pximg.net/c/250x250_80_a2/img-master/img/2014/10/29/09/27/19/46785915_p0_square1200.jpg
|
||||||
|
# * https://i.pximg.net/img-master/img/2014/10/03/18/10/20/46324488_p0_master1200.jpg
|
||||||
|
#
|
||||||
|
# * https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46324488
|
||||||
|
# * https://www.pixiv.net/member_illust.php?mode=manga&illust_id=46324488
|
||||||
|
# * https://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46324488&page=0
|
||||||
|
#
|
||||||
|
# * https://www.pixiv.net/member.php?id=339253
|
||||||
|
# * https://www.pixiv.net/member_illust.php?id=339253&type=illust
|
||||||
|
# * https://www.pixiv.net/u/9202877
|
||||||
|
# * https://www.pixiv.net/stacc/noizave
|
||||||
|
# * http://www.pixiv.me/noizave
|
||||||
|
#
|
||||||
|
# Fanbox
|
||||||
|
#
|
||||||
|
# * https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png
|
||||||
|
# * https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg
|
||||||
|
#
|
||||||
|
# * https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg
|
||||||
|
# * https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg
|
||||||
|
#
|
||||||
|
# * https://www.pixiv.net/fanbox/creator/1566167/post/39714
|
||||||
|
# * https://www.pixiv.net/fanbox/creator/1566167
|
||||||
|
#
|
||||||
|
# Novels
|
||||||
|
#
|
||||||
|
# * https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg
|
||||||
|
# * https://i.pximg.net/c/600x600/novel-cover-master/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42_master1200.jpg
|
||||||
|
# * https://img-novel.pximg.net/img-novel/work_main/XtFbt7gsymsvyaG45lZ8/1554.jpg?20190107110435
|
||||||
|
#
|
||||||
|
# * https://www.pixiv.net/novel/show.php?id=10617324
|
||||||
|
# * https://novel.pixiv.net/works/1554
|
||||||
|
#
|
||||||
|
# Sketch
|
||||||
|
#
|
||||||
|
# * https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg
|
||||||
|
# * https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg
|
||||||
|
# * https://sketch.pixiv.net/items/1588346448904706151
|
||||||
|
# * https://sketch.pixiv.net/@0125840
|
||||||
|
#
|
||||||
|
|
||||||
module Sources
|
module Sources
|
||||||
module Strategies
|
module Strategies
|
||||||
@@ -218,37 +261,45 @@ module Sources
|
|||||||
# even though it makes sense to reference page_url here, it will only look
|
# even though it makes sense to reference page_url here, it will only look
|
||||||
# at (url, referer_url).
|
# at (url, referer_url).
|
||||||
def illust_id
|
def illust_id
|
||||||
# http://img18.pixiv.net/img/evazion/14901720.png
|
return nil if novel_id.present? || fanbox_id.present?
|
||||||
#
|
|
||||||
# http://i2.pixiv.net/img18/img/evazion/14901720.png
|
|
||||||
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
|
||||||
# http://i2.pixiv.net/img18/img/evazion/14901720_s.png
|
|
||||||
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
|
|
||||||
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
|
|
||||||
#
|
|
||||||
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg
|
|
||||||
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
|
|
||||||
#
|
|
||||||
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
|
|
||||||
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
|
|
||||||
#
|
|
||||||
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
|
|
||||||
if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
|
||||||
return $1
|
|
||||||
end
|
|
||||||
|
|
||||||
[url, referer_url].each do |x|
|
parsed_urls.each do |url|
|
||||||
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
|
||||||
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
|
||||||
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
|
||||||
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
|
||||||
if x =~ /illust_id=(\d+)/i
|
if url.host == "www.pixiv.net" && url.path == "/member_illust.php" && url.query_values["illust_id"].present?
|
||||||
return $1
|
return url.query_values["illust_id"].to_i
|
||||||
end
|
|
||||||
|
|
||||||
# http://www.pixiv.net/i/18557054
|
# http://www.pixiv.net/i/18557054
|
||||||
if x =~ %r!pixiv\.net/i/(\d+)!i
|
elsif url.host == "www.pixiv.net" && url.path =~ %r!\A/i/(?<illust_id>\d+)\z!i
|
||||||
return $1
|
return $~[:illust_id].to_i
|
||||||
|
|
||||||
|
# http://img18.pixiv.net/img/evazion/14901720.png
|
||||||
|
# http://i2.pixiv.net/img18/img/evazion/14901720.png
|
||||||
|
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
|
||||||
|
# http://i2.pixiv.net/img18/img/evazion/14901720_s.png
|
||||||
|
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
|
||||||
|
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
|
||||||
|
elsif url.host =~ %r!\A(?:i\d+|img\d+)\.pixiv\.net\z!i &&
|
||||||
|
url.path =~ %r!\A(?:/img\d+)?/img/\w+/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
||||||
|
return $~[:illust_id].to_i
|
||||||
|
|
||||||
|
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg
|
||||||
|
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
|
||||||
|
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
|
||||||
|
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
|
||||||
|
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
|
||||||
|
# https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
|
||||||
|
# https://i.pximg.net/img-master/img/2014/10/03/18/10/20/46324488_p0_master1200.jpg
|
||||||
|
#
|
||||||
|
# but not:
|
||||||
|
#
|
||||||
|
# https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg
|
||||||
|
# https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg
|
||||||
|
elsif url.host =~ %r!\A(?:i\.pximg\.net|i\d+\.pixiv\.net)\z!i &&
|
||||||
|
url.path =~ %r!\A(/c/\w+)?/img-[a-z-]+/img/\d{4}(?:/\d{2}){5}/(?<illust_id>\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
|
||||||
|
return $~[:illust_id].to_i
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -1438,79 +1438,9 @@ class PostTest < ActiveSupport::TestCase
|
|||||||
|
|
||||||
context "that is from pixiv" do
|
context "that is from pixiv" do
|
||||||
should "save the pixiv id" do
|
should "save the pixiv id" do
|
||||||
@post.update(source: "https://img18.pixiv.net/img/evazion/14901720.png")
|
|
||||||
assert_equal(14901720, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://img18.pixiv.net/img/evazion/14901720.png")
|
|
||||||
assert_equal(14901720, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i2.pixiv.net/img18/img/evazion/14901720.png")
|
|
||||||
assert_equal(14901720, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i2.pixiv.net/img18/img/evazion/14901720_m.png")
|
|
||||||
assert_equal(14901720, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i2.pixiv.net/img18/img/evazion/14901720_s.png")
|
|
||||||
assert_equal(14901720, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg")
|
|
||||||
assert_equal(46304396, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png")
|
@post.update(source: "http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png")
|
||||||
assert_equal(46304396, @post.pixiv_id)
|
assert_equal(46304396, @post.pixiv_id)
|
||||||
@post.pixiv_id = nil
|
@post.pixiv_id = nil
|
||||||
|
|
||||||
@post.update(source: "http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip")
|
|
||||||
assert_equal(46323924, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@post.update(source: "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
|
|
||||||
@post.update(source: "http://www.pixiv.net/i/18557054")
|
|
||||||
assert_equal(18557054, @post.pixiv_id)
|
|
||||||
@post.pixiv_id = nil
|
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,17 @@ require 'test_helper'
|
|||||||
|
|
||||||
module Sources
|
module Sources
|
||||||
class PixivTest < ActiveSupport::TestCase
|
class PixivTest < ActiveSupport::TestCase
|
||||||
|
def assert_illust_id(illust_id, url)
|
||||||
|
site = Sources::Strategies.find(url)
|
||||||
|
assert_equal(illust_id, site.illust_id)
|
||||||
|
assert_nothing_raised { site.to_h }
|
||||||
|
end
|
||||||
|
|
||||||
|
def assert_nil_illust_id(url)
|
||||||
|
site = Sources::Strategies.find(url)
|
||||||
|
assert_nil(site.illust_id)
|
||||||
|
end
|
||||||
|
|
||||||
def get_source(source)
|
def get_source(source)
|
||||||
@site = Sources::Strategies.find(source)
|
@site = Sources::Strategies.find(source)
|
||||||
|
|
||||||
@@ -21,21 +32,6 @@ module Sources
|
|||||||
end
|
end
|
||||||
|
|
||||||
context "in all cases" do
|
context "in all cases" do
|
||||||
context "A touch page" do
|
|
||||||
setup do
|
|
||||||
@site = Sources::Strategies.find("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915")
|
|
||||||
@image_urls = @site.image_urls
|
|
||||||
end
|
|
||||||
|
|
||||||
should "get all the image urls" do
|
|
||||||
expected_urls = [
|
|
||||||
"https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png",
|
|
||||||
"https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p1.png"
|
|
||||||
].sort
|
|
||||||
assert_equal(expected_urls, @image_urls.sort)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
context "A gallery page" do
|
context "A gallery page" do
|
||||||
setup do
|
setup do
|
||||||
@site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482")
|
@site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482")
|
||||||
@@ -292,6 +288,51 @@ module Sources
|
|||||||
assert_includes(source.profile_urls, "https://www.pixiv.net/stacc/uroobnad")
|
assert_includes(source.profile_urls, "https://www.pixiv.net/stacc/uroobnad")
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
context "parsing illust ids" do
|
||||||
|
should "parse ids from illust urls" do
|
||||||
|
assert_illust_id(46324488, "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46324488")
|
||||||
|
assert_illust_id(46324488, "https://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46324488&page=0")
|
||||||
|
assert_illust_id(46324488, "https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png")
|
||||||
|
assert_illust_id(46324488, "https://i.pximg.net/img-master/img/2014/10/03/18/10/20/46324488_p0_master1200.jpg")
|
||||||
|
|
||||||
|
assert_illust_id(46785915, "https://i.pximg.net/c/250x250_80_a2/img-master/img/2014/10/29/09/27/19/46785915_p0_square1200.jpg")
|
||||||
|
|
||||||
|
assert_illust_id(46323924, "http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip")
|
||||||
|
assert_illust_id(46304396, "http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png")
|
||||||
|
assert_illust_id(46304396, "http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg")
|
||||||
|
|
||||||
|
assert_illust_id(14901720, "http://img18.pixiv.net/img/evazion/14901720.png")
|
||||||
|
assert_illust_id(14901720, "http://i2.pixiv.net/img18/img/evazion/14901720.png")
|
||||||
|
assert_illust_id(14901720, "http://i2.pixiv.net/img18/img/evazion/14901720_m.png")
|
||||||
|
assert_illust_id(14901720, "http://i2.pixiv.net/img18/img/evazion/14901720_s.png")
|
||||||
|
|
||||||
|
assert_illust_id(18557054, "http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png")
|
||||||
|
assert_illust_id(18557054, "http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png")
|
||||||
|
assert_illust_id(18557054, "http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_64x64.jpg")
|
||||||
|
assert_illust_id(18557054, "http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png")
|
||||||
|
assert_illust_id(18557054, "http://www.pixiv.net/i/18557054")
|
||||||
|
end
|
||||||
|
|
||||||
|
should "not misparse ids from fanbox urls" do
|
||||||
|
assert_nil_illust_id("https://fanbox.pixiv.net/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png")
|
||||||
|
assert_nil_illust_id("https://pixiv.pximg.net/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg")
|
||||||
|
assert_nil_illust_id("https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg")
|
||||||
|
assert_nil_illust_id("https://pixiv.pximg.net/c/1200x630_90_a2_g5/fanbox/public/images/post/186919/cover/VCI1Mcs2rbmWPg0mmiTisovn.jpeg")
|
||||||
|
end
|
||||||
|
|
||||||
|
should "not misparse ids from sketch urls" do
|
||||||
|
assert_nil_illust_id("https://img-sketch.pixiv.net/uploads/medium/file/4463372/8906921629213362989.jpg")
|
||||||
|
assert_nil_illust_id("https://img-sketch.pximg.net/c!/w=540,f=webp:jpeg/uploads/medium/file/4463372/8906921629213362989.jpg")
|
||||||
|
assert_nil_illust_id("https://sketch.pixiv.net/items/1588346448904706151")
|
||||||
|
end
|
||||||
|
|
||||||
|
should "not misparse ids from novel urls" do
|
||||||
|
assert_nil_illust_id("https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg")
|
||||||
|
assert_nil_illust_id("https://i.pximg.net/c/600x600/novel-cover-master/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42_master1200.jpg")
|
||||||
|
assert_nil_illust_id("https://www.pixiv.net/novel/show.php?id=10617324")
|
||||||
|
end
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user