Fix skeb strategy
This commit is contained in:
@@ -49,7 +49,12 @@ module Sources
|
||||
# Heavy heuristic to extract the uncropped image among the nighmare that is the skeb minified json
|
||||
candidates = page&.css("script")&.map { |script| script.text&.scan(/(https:\\u002F\\u002Fskeb\.imgix\.net.*?)(?:"|,|\s)/) }
|
||||
candidates = candidates.to_a.flatten.compact.uniq.reject { |match| match.include? "crop=" }
|
||||
candidates.map { |img| img.gsub("\\u002F", "/") }
|
||||
# sometimes skeb offers a slightly-smaller, non-watermarked version picture
|
||||
unwatermarked = candidates.reject { |match| match.include? "=SAMPLE" }
|
||||
unsampled = unwatermarked.reject { |match| match.include? "q=" }
|
||||
|
||||
final_candidates = [unsampled, unwatermarked, candidates].reject(&:empty?).first&.to_a
|
||||
final_candidates.map { |img| img.gsub("\\u002F", "/") }
|
||||
else
|
||||
[]
|
||||
end
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
require 'test_helper'
|
||||
require "test_helper"
|
||||
|
||||
module Sources
|
||||
class SkebTest < ActiveSupport::TestCase
|
||||
@@ -65,6 +65,32 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
context "A post with a smaller unwatermarked version" do
|
||||
should "get the smaller but clean picture" do
|
||||
site = Sources::Strategies.find("https://skeb.jp/@2gi0gi_/works/13")
|
||||
assert_equal(["https://skeb.imgix.net/requests/191942_0?bg=%23fff&fm=jpg&q=45&w=696&s=5783ee951cc55d183713395926389453"], site.image_urls)
|
||||
end
|
||||
end
|
||||
|
||||
context "A post with both the small and large version clean" do
|
||||
should "just get the bigger image" do
|
||||
site = Sources::Strategies.find("https://skeb.jp/@LambOic029/works/149")
|
||||
assert_equal(["https://skeb.imgix.net/uploads/origins/ebe94108-7ca7-4b3d-b80c-b37759ffd695?bg=%23fff&auto=format&w=800&s=25a889a808e6062d03985f7408201a4d"], site.image_urls)
|
||||
end
|
||||
end
|
||||
|
||||
context "A post with two images" do
|
||||
should "get both correctly" do
|
||||
site = Sources::Strategies.find("https://skeb.jp/@LambOic029/works/146")
|
||||
image_urls = %w[
|
||||
https://skeb.imgix.net/uploads/origins/e888bb27-e1a6-48ec-a317-7615252ff818?bg=%23fff&auto=format&w=800&s=7c518083d3fb19c8d5e7376f628f0fb0
|
||||
https://skeb.imgix.net/uploads/origins/3fc062c5-231d-400f-921f-22d77cde54df?bg=%23fff&auto=format&w=800&s=f20697609ca2923f96fc49ca7eba22b6
|
||||
]
|
||||
|
||||
assert_equal(image_urls, site.image_urls)
|
||||
end
|
||||
end
|
||||
|
||||
context "normalizing for source" do
|
||||
should "avoid normalizing unnormalizable urls" do
|
||||
bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a"
|
||||
|
||||
Reference in New Issue
Block a user