diff --git a/app/logical/source/extractor/pixiv.rb b/app/logical/source/extractor/pixiv.rb index 482632c0b..16e8bd23e 100644 --- a/app/logical/source/extractor/pixiv.rb +++ b/app/logical/source/extractor/pixiv.rb @@ -39,8 +39,15 @@ module Source def image_urls if is_ugoira? [api_ugoira[:originalSrc]] + # If it's a full image URL, then use it as-is instead of looking it up in the API, because it could be the + # original version of an image that has since been revised. + elsif parsed_url.full_image_url? + [parsed_url.to_s] + # If it's a sample image URL, then try to find the full image URL in the API if possible. elsif parsed_url.image_url? && parsed_url.page && original_urls.present? [original_urls[parsed_url.page]] + # Otherwise if it's a sample image and we can't get the full image from the API (presumably because the post + # has been deleted), just use the sample version as is. elsif parsed_url.image_url? [parsed_url.to_s] else diff --git a/app/logical/source/url/pixiv.rb b/app/logical/source/url/pixiv.rb index 0f059746f..c6d91fd55 100644 --- a/app/logical/source/url/pixiv.rb +++ b/app/logical/source/url/pixiv.rb @@ -2,7 +2,7 @@ module Source class URL::Pixiv < Source::URL - attr_reader :work_id, :page, :username, :user_id + attr_reader :work_id, :image_type, :page, :username, :user_id def self.match?(url) return false if Source::URL::Fanbox.match?(url) || Source::URL::PixivSketch.match?(url) || Source::URL::Booth.match?(url) @@ -27,7 +27,8 @@ module Source # but not: # # https://i.pximg.net/novel-cover-original/img/2019/01/14/01/15/05/10617324_d84daae89092d96bbe66efafec136e42.jpg - in *, ("img-original" | "img-master" | "img-zip-ugoira" | "img-inf" | "custom-thumb"), "img", year, month, day, hour, min, sec, file if image_url? + in *, ("img-original" | "img-master" | "img-zip-ugoira" | "img-inf" | "custom-thumb") => type, "img", year, month, day, hour, min, sec, file if image_url? + @image_type = type parse_filename # http://img18.pixiv.net/img/evazion/14901720.png @@ -128,6 +129,10 @@ module Source host.in?(["i.pximg.net", "i-f.pximg.net", "tc-pximg01.techorus-cdn.com"]) || host.match?(/\A(i\d+|img\d+)\.pixiv\.net\z/) end + def full_image_url? + image_type.in?(%w[img-original img-zip-ugoira]) + end + def is_ugoira? @ugoira.present? end diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index abab8eb83..4c6f035ad 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -166,6 +166,35 @@ module Sources end end + context "A raw image URL that has been revised should get the unrevised image URL" do + strategy_should_work( + "https://i.pximg.net/img-original/img/2022/08/14/19/23/06/100474393_p0.png", + deleted: true, + image_urls: ["https://i.pximg.net/img-original/img/2022/08/14/19/23/06/100474393_p0.png"], + artist_commentary_title: "シャイリリー", + artist_name: "影おじ (隠れエリア)", + profile_url: "https://www.pixiv.net/users/6570768", + profile_urls: %w[https://www.pixiv.net/stacc/haku3490 https://www.pixiv.net/users/6570768], + tags: %w[r-18 shylily シャイリリー バーチャルyoutuber 両手に茎 乱交], + ) + end + + context "A post has been revised should get the revised image URLs" do + strategy_should_work( + "https://www.pixiv.net/en/artworks/100474393", + image_urls: %w[ + https://i.pximg.net/img-original/img/2022/08/14/21/21/24/100474393_p0.png + https://i.pximg.net/img-original/img/2022/08/14/21/21/24/100474393_p1.png + https://i.pximg.net/img-original/img/2022/08/14/21/21/24/100474393_p2.png + ], + artist_commentary_title: "シャイリリー", + artist_name: "影おじ (隠れエリア)", + profile_url: "https://www.pixiv.net/users/6570768", + profile_urls: %w[https://www.pixiv.net/stacc/haku3490 https://www.pixiv.net/users/6570768], + tags: %w[r-18 shylily シャイリリー バーチャルyoutuber 両手に茎 乱交], + ) + end + context "fetching the commentary" do should "work when the description is blank" do get_source("https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981746")