From 606b31184189b8344c5fe120aa0839c5c759b643 Mon Sep 17 00:00:00 2001 From: nonamethanks Date: Wed, 21 Jul 2021 14:07:45 +0200 Subject: [PATCH] Fix skeb strategy --- app/logical/sources/strategies/skeb.rb | 7 ++++++- test/unit/sources/skeb_test.rb | 28 +++++++++++++++++++++++++- 2 files changed, 33 insertions(+), 2 deletions(-) diff --git a/app/logical/sources/strategies/skeb.rb b/app/logical/sources/strategies/skeb.rb index 41200483d..12a9ef877 100644 --- a/app/logical/sources/strategies/skeb.rb +++ b/app/logical/sources/strategies/skeb.rb @@ -49,7 +49,12 @@ module Sources # Heavy heuristic to extract the uncropped image among the nighmare that is the skeb minified json candidates = page&.css("script")&.map { |script| script.text&.scan(/(https:\\u002F\\u002Fskeb\.imgix\.net.*?)(?:"|,|\s)/) } candidates = candidates.to_a.flatten.compact.uniq.reject { |match| match.include? "crop=" } - candidates.map { |img| img.gsub("\\u002F", "/") } + # sometimes skeb offers a slightly-smaller, non-watermarked version picture + unwatermarked = candidates.reject { |match| match.include? "=SAMPLE" } + unsampled = unwatermarked.reject { |match| match.include? "q=" } + + final_candidates = [unsampled, unwatermarked, candidates].reject(&:empty?).first&.to_a + final_candidates.map { |img| img.gsub("\\u002F", "/") } else [] end diff --git a/test/unit/sources/skeb_test.rb b/test/unit/sources/skeb_test.rb index f23d49bfc..a076f12e5 100644 --- a/test/unit/sources/skeb_test.rb +++ b/test/unit/sources/skeb_test.rb @@ -1,4 +1,4 @@ -require 'test_helper' +require "test_helper" module Sources class SkebTest < ActiveSupport::TestCase @@ -65,6 +65,32 @@ module Sources end end + context "A post with a smaller unwatermarked version" do + should "get the smaller but clean picture" do + site = Sources::Strategies.find("https://skeb.jp/@2gi0gi_/works/13") + assert_equal(["https://skeb.imgix.net/requests/191942_0?bg=%23fff&fm=jpg&q=45&w=696&s=5783ee951cc55d183713395926389453"], site.image_urls) + end + end + + context "A post with both the small and large version clean" do + should "just get the bigger image" do + site = Sources::Strategies.find("https://skeb.jp/@LambOic029/works/149") + assert_equal(["https://skeb.imgix.net/uploads/origins/ebe94108-7ca7-4b3d-b80c-b37759ffd695?bg=%23fff&auto=format&w=800&s=25a889a808e6062d03985f7408201a4d"], site.image_urls) + end + end + + context "A post with two images" do + should "get both correctly" do + site = Sources::Strategies.find("https://skeb.jp/@LambOic029/works/146") + image_urls = %w[ + https://skeb.imgix.net/uploads/origins/e888bb27-e1a6-48ec-a317-7615252ff818?bg=%23fff&auto=format&w=800&s=7c518083d3fb19c8d5e7376f628f0fb0 + https://skeb.imgix.net/uploads/origins/3fc062c5-231d-400f-921f-22d77cde54df?bg=%23fff&auto=format&w=800&s=f20697609ca2923f96fc49ca7eba22b6 + ] + + assert_equal(image_urls, site.image_urls) + end + end + context "normalizing for source" do should "avoid normalizing unnormalizable urls" do bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a"