diff --git a/app/logical/danbooru/url.rb b/app/logical/danbooru/url.rb index d9f379906..28d255db1 100644 --- a/app/logical/danbooru/url.rb +++ b/app/logical/danbooru/url.rb @@ -43,8 +43,13 @@ module Danbooru nil end - # @return [String] the URL in normalized form + # @return [String] the URL in unnormalized form def to_s + original_url + end + + # @return [String] the URL in normalized form + def to_normalized_s url.to_str end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index adf235e5d..d2dc04edc 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -19,6 +19,7 @@ module Source class URL < Danbooru::URL SUBCLASSES = [ Source::URL::Twitter, + Source::URL::Skeb, Source::URL::TwitPic, Source::URL::Foundation, ] diff --git a/app/logical/source/url/skeb.rb b/app/logical/source/url/skeb.rb new file mode 100644 index 000000000..1fdb5bb45 --- /dev/null +++ b/app/logical/source/url/skeb.rb @@ -0,0 +1,97 @@ +# frozen_string_literal: true + +# Image URLs +# +## Non-watermarked: +# +# # Page: https://skeb.jp/@OrvMZ/works/3 +# * https://skeb.imgix.net/requests/199886_0?bg=%23fff&auto=format&w=800&s=5a6a908ab964fcdfc4713fad179fe715 +# +## Watermarked: +# +# * https://skeb.imgix.net/requests/73290_0?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&w=800&s=4843435cff85d623b1f657209d131526 +# * https://skeb.imgix.net/uploads/origins/04d62c2f-e396-46f9-903a-3ca8bd69fc7c?bg=%23fff&auto=format&w=800&s=966c5d0389c3b94dc36ac970f812bef4 (new format) +# +## Full Size (found in commissioner_upload): +# +# # Page: https://skeb.jp/@matsuda_Toki/works/101 +# * https://skeb.imgix.net/requests/53269_1?bg=%23fff&fm=png&dl=53269.png&w=1.0&h=1.0&s=44588ea9c41881049e392adb1df21cce +# +# The signature is required and tied to the parameters. Doesn't seem like it's possible to reverse engineer it to remove the watermark, unfortunately. +# +# Video URLs +# +# # Page: https://skeb.jp/@kaisouafuro/works/112 +# * https://skeb-production.s3.ap-northeast-1.amazonaws.com/uploads/outputs/20f9d68f-50ec-44ae-8630-173fc38a2d6a?response-content-disposition=attachment%3B%20filename%3D%22458093-1.output.mp4%22%3B%20filename%2A%3DUTF-8%27%27458093-1.output.mp4&response-content-type=video%2Fmp4&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIVPUTFQBBL7UDSUA%2F20220221%2Fap-northeast-1%2Fs3%2Faws4_request&X-Amz-Date=20220221T200057Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=7f028cfd9a56344cf1d42410063fad3ef30a1e47b83cef047247e0c37df01df0 +# +# Page URLs +# +# * https://skeb.jp/@OrvMZ/works/3 (non-watermarked) +# * https://skeb.jp/@OrvMZ/works/1 (separated request and client's message after delivery. We can't get the latter) +# * https://skeb.jp/@asanagi/works/16 (age-restricted, watermarked) +# * https://skeb.jp/@asanagi/works/6 (private, returns 404) +# * https://skeb.jp/@nasuno42/works/30 (multi-image post) +# +# Profile URLs +# +# Since skeb forces login through twitter, usernames are the same as twitter +# +# * https://skeb.jp/@asanagi +# +# API URLs +# +## Must send "Authorization: Bearer null" +# +# * https://skeb.jp/api/users/kaisouafuro +# * https://skeb.jp/api/users/kaisouafuro/works/112 +# +class Source::URL::Skeb < Source::URL + attr_reader :username, :work_id, :image_id, :image_uuid + + def self.match?(url) + url.host.in?(%w[skeb.jp skeb.imgix.net skeb-production.s3.ap-northeast-1.amazonaws.com]) + end + + def parse + case [domain, *path_segments] + + # https://skeb.jp/@asanagi + in "skeb.jp", /^@/ => username + @username = username.delete_prefix("@") + + # https://skeb.jp/@OrvMZ/works/3 + in "skeb.jp", /^@/ => username, "works", work_id + @username = username.delete_prefix("@") + @work_id = work_id + + # https://skeb.imgix.net/requests/199886_0?bg=%23fff&auto=format&w=800&s=5a6a908ab964fcdfc4713fad179fe715 + # https://skeb.imgix.net/requests/73290_0?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&w=800&s=4843435cff85d623b1f657209d131526 + # https://skeb.imgix.net/requests/53269_1?bg=%23fff&fm=png&dl=53269.png&w=1.0&h=1.0&s=44588ea9c41881049e392adb1df21cce + in "imgix.net", "requests", image_id + @image_id = image_id + + # https://skeb.imgix.net/uploads/origins/04d62c2f-e396-46f9-903a-3ca8bd69fc7c?bg=%23fff&auto=format&w=800&s=966c5d0389c3b94dc36ac970f812bef4 + in "imgix.net", "uploads", "origins", image_uuid + @image_uuid = image_uuid + + # Page: https://skeb.jp/@kaisouafuro/works/112 + # https://skeb-production.s3.ap-northeast-1.amazonaws.com/uploads/outputs/20f9d68f-50ec-44ae-8630-173fc38a2d6a?response-content-disposition=attachment%3B%20filename%3D%22458093-1.output.mp4%22%3B%20filename%2A%3DUTF-8%27%27458093-1.output.mp4&response-content-type=video%2Fmp4&X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIVPUTFQBBL7UDSUA%2F20220221%2Fap-northeast-1%2Fs3%2Faws4_request&X-Amz-Date=20220221T200057Z&X-Amz-Expires=604800&X-Amz-SignedHeaders=host&X-Amz-Signature=7f028cfd9a56344cf1d42410063fad3ef30a1e47b83cef047247e0c37df01df0 + in "amazonaws.com", "uploads", "outputs", image_uuid + @image_uuid = image_uuid + + else + end + end + + def image_url? + domain.in?(%[imgix.net amazonaws.com]) + end + + def animated? + image_url? && params[:fm].in?(["gif", "mp4"]) + end + + def watermarked? + image_url? && params[:txt].present? + end +end diff --git a/app/logical/sources/strategies/skeb.rb b/app/logical/sources/strategies/skeb.rb index 8351a5b06..ba774d41c 100644 --- a/app/logical/sources/strategies/skeb.rb +++ b/app/logical/sources/strategies/skeb.rb @@ -1,72 +1,54 @@ # frozen_string_literal: true -# Image URLS -## Non-watermarked: -# * https://skeb.imgix.net/requests/199886_0?bg=%23fff&auto=format&w=800&s=5a6a908ab964fcdfc4713fad179fe715 -## Watermarked: -# * https://skeb.imgix.net/requests/73290_0?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&w=800&s=4843435cff85d623b1f657209d131526 -# * https://skeb.imgix.net/uploads/origins/04d62c2f-e396-46f9-903a-3ca8bd69fc7c?bg=%23fff&auto=format&w=800&s=966c5d0389c3b94dc36ac970f812bef4 (new format) -## Full Size (found in commissioner_upload): -# * https://skeb.imgix.net/requests/53269_1?bg=%23fff&fm=png&dl=53269.png&w=1.0&h=1.0&s=44588ea9c41881049e392adb1df21cce -# -# The signature is required and tied to the parameters. Doesn't seem like it's possible to reverse engineer it to remove the watermark, unfortunately. -# -# Page URLS -# * https://skeb.jp/@OrvMZ/works/3 (non-watermarked) -# * https://skeb.jp/@OrvMZ/works/1 (separated request and client's message after delivery. We can't get the latter) -# * https://skeb.jp/@asanagi/works/16 (age-restricted, watermarked) -# * https://skeb.jp/@asanagi/works/6 (private, returns 404) -# * https://skeb.jp/@nasuno42/works/30 (multi-image post) -# -# Profile URLS -# Since skeb forces login through twitter, usernames are the same as twitter -# * https://skeb.jp/@asanagi - +# @see Source::URL::Skeb module Sources module Strategies class Skeb < Base - PROFILE_URL = %r{https?://(?:www\.)?skeb\.jp/@(?\w+)}i - PAGE_URL = %r{#{PROFILE_URL}/works/(?\d+)}i - IMAGE_URL = %r{https?://(?:(?:www\.)?skeb\.imgix\.net|skeb-production.s3.ap-northeast-1.amazonaws.com/)/.+}i - UUID_REGEX = %r{/(?(?:(?:\w+-)+\w+|(?:\d+_\d+))).*(?:fm=(?\w+))?.*} - - def domains - ["skeb.jp"] - end - - def image_domains - ["skeb.imgix.net", "skeb-production.s3.ap-northeast-1.amazonaws.com"] - end + extend Memoist def match? - return false if parsed_url.nil? - parsed_url.domain.in?(domains) || parsed_url.host.in?(image_domains) + parsed_url&.site_name == "Skeb" end def site_name - "Skeb" + parsed_url.site_name end def image_urls - if url =~ IMAGE_URL + if parsed_url.image_url? [url] - elsif api_response.present? - previews = api_response["previews"].to_a.map { |preview| preview&.dig("url") }.compact.uniq - - unwatermarked = api_response["article_image_url"] - return previews unless unwatermarked.present? - previews.map do |p| - next p unless p[UUID_REGEX, :uuid].present? && p[UUID_REGEX, :uuid] == unwatermarked[UUID_REGEX, :uuid] - next p if p[/fm=(\w+)/, 1].in?(["gif", "mp4"]) - next p unless p.include?("&txt=") - - unwatermarked - end + elsif unwatermarked_url.present? + # If the unwatermarked URL is present, then find and replace the watermarked URL + # with the unwatermarked version (unless the watermarked version is a video or + # gif, in which case the unwatermarked URL is not used because it's a still image). + # + # https://skeb.jp/@goma_feet/works/1: https://skeb.imgix.net/uploads/origins/78ca23dc-a053-4ebe-894f-d5a06e228af8?bg=%23fff&auto=format&w=800&s=3de55b04236059113659f99fd6900d7d + # https://skeb.jp/@2gi0gi_/works/13: https://skeb.imgix.net/requests/191942_0?bg=%23fff&fm=jpg&q=45&w=696&s=5783ee951cc55d183713395926389453 + # https://skeb.jp/@tontaro_/works/316: https://skeb.imgix.net/uploads/origins/5097b1e1-18ce-418e-82f0-e7e2cdab1cea?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&fm=mp4&w=800&s=fcff06871e114b3dbf505c04f27b5ed1 + sample_urls.map do |sample_url| + if sample_url.path == unwatermarked_url.path && sample_url.watermarked? && !sample_url.animated? + unwatermarked_url + else + sample_url + end + end.map(&:to_s) else - [] + sample_urls.map(&:to_s) end end + def sample_urls + api_response["previews"].to_a.pluck("url").compact.map { |url| Source::URL.parse(url) } + end + + # Some posts have an unwatermarked version of the image. Usually it's lower + # resolution and lower JPEG quality than the watermarked image. Multi-image posts + # will have only one unwatermarked URL. + def unwatermarked_url + return nil if api_response["article_image_url"].nil? + Source::URL.parse(api_response["article_image_url"]) + end + def page_url return unless artist_name.present? && illust_id.present? "https://skeb.jp/@#{artist_name}/works/#{illust_id}" @@ -76,13 +58,19 @@ module Sources page_url end + def api_url + return nil unless artist_name.present? && illust_id.present? + "https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}" + end + def api_response - return {} unless artist_name.present? && illust_id.present? + return {} unless api_url.present? + headers = { Referer: profile_url, Authorization: "Bearer null", } - api_url = "https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}" + response = http.cache(1.minute).headers(headers).get(api_url) return {} unless response.status == 200 # The status check is required for private commissions, which return 404 @@ -96,7 +84,7 @@ module Sources end def artist_name - urls.map { |u| u[PROFILE_URL, :artist_name] }.compact.first + parsed_url.username || parsed_referer&.username end def display_name @@ -104,7 +92,7 @@ module Sources end def illust_id - urls.map { |u| u[PAGE_URL, :illust_id] }.compact.first + parsed_url.work_id || parsed_referer&.work_id end def other_names @@ -127,6 +115,8 @@ module Sources artist_commentary_desc end end + + memoize :api_response end end end diff --git a/app/models/artist_url.rb b/app/models/artist_url.rb index 7ca9d44fa..ba0dfb48c 100644 --- a/app/models/artist_url.rb +++ b/app/models/artist_url.rb @@ -118,7 +118,7 @@ class ArtistURL < ApplicationRecord end def self.normalize_url(url) - Danbooru::URL.parse(url)&.to_s.presence || url + Danbooru::URL.parse(url)&.to_normalized_s.presence || url end def url=(url) diff --git a/app/models/upload.rb b/app/models/upload.rb index 1a3e21b32..511177edc 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -70,7 +70,7 @@ class Upload < ApplicationRecord class_methods do # percent-encode unicode characters in the URL def normalize_source(url) - Danbooru::URL.parse(url)&.to_s.presence || url + Danbooru::URL.parse(url)&.to_normalized_s.presence || url end end end diff --git a/test/functional/uploads_controller_test.rb b/test/functional/uploads_controller_test.rb index 47e478f4f..c2f5ea4e3 100644 --- a/test/functional/uploads_controller_test.rb +++ b/test/functional/uploads_controller_test.rb @@ -338,6 +338,10 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest should_upload_successfully("https://foundation.app/@mochiiimo/~/97376") should_upload_successfully("https://foundation.app/@mochiiimo/foundation/97376") should_upload_successfully("https://foundation.app/@KILLERGF/kgfgen/4") + + should_upload_successfully("https://skeb.jp/@kokuzou593/works/45") + should_upload_successfully("https://skeb.jp/@LambOic029/works/146") + should_upload_successfully("https://skeb.imgix.net/uploads/origins/307941e9-dbe0-4e4b-93d4-94accdaff9a0?bg=%23fff&auto=format&w=800&s=e0ddfb1fa0d9f23797b338598aae78fa") end end end