nijie: fix failure to handle certain image urls.

Fix IMAGE_URL regex not matching urls of this form:

* https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png

This caused the illust id to not be parsed from the url, which led to `#image_url`
returning nil, which led to uploads failing because the url to download was missing.
This commit is contained in:
evazion
2018-11-12 17:58:36 -06:00
parent 8f6c710c6b
commit bea8c2a4b8
2 changed files with 24 additions and 4 deletions

View File

@@ -5,6 +5,7 @@
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png (page: http://nijie.info/view.php?id=218856)
# * https://pic01.nijie.info/nijie_picture/diff/main/218856_1_236014_20170620101330.png
# * https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png (page: http://nijie.info/view_popup.php?id=265428#diff_2)
# * https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png (page: http://nijie.info/view_popup.php?id=287736#diff_2)
#
# Unhandled:
#
@@ -47,11 +48,17 @@ module Sources
# https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg
# https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# https://pic05.nijie.info/nijie_picture/diff/main/559053_20180604023346_1.png
FILENAME1 = %r!(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i
# https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png
FILENAME2 = %r!(?<illust_id>\d+)_\d+_(?<artist_id>\d+)_(?<timestamp>\d{14})!i
# https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png
FILENAME3 = %r!(?<illust_id>\d+)_(?<artist_id>\d+)_(?<timestamp>\d{14})_\d+!i
DIR = %r!(?:__rs_\w+/)?nijie_picture(?:/diff/main)?!
FILENAME = %r!(?:(?<illust_id>\d+)_(?<page>\d+_))?(?<artist_id>\d+)_(?<timestamp>\d{14})(?:_\d+)?!i
IMAGE_URL = %r!\Ahttps?://pic\d+\.nijie\.info/#{DIR}/#{FILENAME}\.\w+\z!i
IMAGE_URL = %r!\Ahttps?://pic\d+\.nijie\.info/#{DIR}/#{Regexp.union(FILENAME1, FILENAME2, FILENAME3)}\.\w+\z!i
def domains
["nijie.info"]

View File

@@ -186,7 +186,7 @@ module Sources
end
end
context "An image url that contains the illust id" do
context "An image url that contains the illust id and artist id (format 1)" do
should "fetch all the data" do
site = Sources::Strategies.find("https://pic03.nijie.info/nijie_picture/diff/main/218856_4_236014_20170620101333.png")
@@ -200,6 +200,19 @@ module Sources
end
end
context "An image url that contains the illust id and artist id (format 2)" do
should "fetch all the data" do
site = Sources::Strategies.find("https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png")
assert_equal("https://nijie.info/view.php?id=287736", site.page_url)
assert_equal("https://nijie.info/view.php?id=287736", site.canonical_url)
assert_equal("https://nijie.info/members.php?id=161475", site.profile_url)
assert_equal("みな本", site.artist_name)
assert_equal(site.url, site.image_url)
assert_equal(3, site.image_urls.size)
end
end
context "An artist profile url" do
should "not fail" do
site = Sources::Strategies.find("https://nijie.info/members_illust.php?id=236014")