From 9205c32424d3fd74bdb3175733fec460dd7bfe63 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 18:30:10 +0200 Subject: [PATCH 01/16] deviantart: revert to 7f482dc35bc7b77bce580ec06b4b708d49d3a29a that's the latest commit made to deviantart files before switching from the developer API to the Javascript backend from the new "Eclipse" frontend. This is necessary because it's basically impossible to download posts now with the JS backend without being logged in, i.e. having the cookies from a logged in user, which can't be used for very long even if exporting them from a browser. You would have to save the cookies deviantart sends you back via the "Set-Cookie" header in a database somewhere in addition to the other added complexity. also * (temporarily) replace HttpartyCache with HTTParty as it's long been removed * fix one case of "last argument as keyword parameter" * change repository url (5d1a1cc87ef5d7409fa81b27e86c0763d8ad0ff6) * remove self-explanatory comment --- app/logical/deviant_art_api_client.rb | 90 ++++++----- app/logical/sources/strategies.rb | 1 + app/logical/sources/strategies/deviant_art.rb | 148 ++++++++++++------ app/logical/sources/strategies/stash.rb | 55 +++++++ config/danbooru_default_config.rb | 9 ++ test/unit/artist_test.rb | 1 + test/unit/sources/deviant_art_test.rb | 5 + test/unit/sources/stash_test.rb | 58 +++++++ 8 files changed, 283 insertions(+), 84 deletions(-) create mode 100644 app/logical/sources/strategies/stash.rb create mode 100644 test/unit/sources/stash_test.rb diff --git a/app/logical/deviant_art_api_client.rb b/app/logical/deviant_art_api_client.rb index 3b8c35734..07bcf50a2 100644 --- a/app/logical/deviant_art_api_client.rb +++ b/app/logical/deviant_art_api_client.rb @@ -1,59 +1,73 @@ -# https://github.com/danbooru/danbooru/issues/4144 +# Authentication is via OAuth2 with the client credentials grant. Register a +# new app at https://www.deviantart.com/developers/ to obtain a client_id and +# client_secret. The app doesn't need to be published. # # API requests must send a user agent and must use gzip compression, otherwise # 403 errors will be returned. +# +# API calls operate on UUIDs. The deviation ID in the URL is not the UUID. UUIDs +# are obtained by scraping the HTML page for the element. +# +# * https://www.deviantart.com/developers/ +# * https://www.deviantart.com/developers/authentication +# * https://www.deviantart.com/developers/errors +# * https://www.deviantart.com/developers/http/v1/20160316 -DeviantArtApiClient = Struct.new(:deviation_id) do - extend Memoist +class DeviantArtApiClient + class Error < StandardError; end + BASE_URL = "https://www.deviantart.com/api/v1/oauth2" - def extended_fetch - params = { deviationid: deviation_id, type: "art", include_session: false } - get("https://www.deviantart.com/_napi/da-deviation/shared_api/deviation/extended_fetch", params: params) + attr_reader :client_id, :client_secret, :httparty_options + + def initialize(client_id, client_secret, httparty_options = {}) + @client_id, @client_secret, @httparty_options = client_id, client_secret, httparty_options end - def extended_fetch_json - JSON.parse(extended_fetch.body).with_indifferent_access + # https://www.deviantart.com/developers/http/v1/20160316/deviation_single/bcc296bdf3b5e40636825a942a514816 + def deviation(uuid) + request("/deviation/#{uuid}") end - def download_url - url = extended_fetch_json.dig(:deviation, :extended, :download, :url) - response = get(url) - response.headers[:location] + # https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd + def download(uuid, mature_content: "1") + request("/deviation/download/#{uuid}", mature_content: mature_content) end - def get(url, retries: 1, **options) - response = http.cookies(cookies).get(url, **options) + # https://www.deviantart.com/developers/http/v1/20160316/deviation_metadata/7824fc14d6fba6acbacca1cf38c24158 + def metadata(*uuids, mature_content: "1", ext_submission: "1", ext_camera: "1", ext_stats: "1") + params = { + deviationids: uuids.flatten, + mature_content: mature_content, + ext_submission: ext_submission, + ext_camera: ext_camera, + ext_stats: ext_stats, + } - new_cookies = response.cookies.cookies.map { |cookie| { cookie.name => cookie.value } }.reduce(&:merge) - new_cookies = new_cookies.slice(:userinfo, :auth, :authsecure) - if new_cookies.present? - DanbooruLogger.info("DeviantArt: updating cookies", url: url, new_cookies: new_cookies, old_cookies: cookies) - self.cookies = new_cookies - end - - # If the old auth cookie expired we may get a 404 with a new auth cookie - # set. Try again with the new cookie. - if response.code == 404 && retries > 0 - DanbooruLogger.info("DeviantArt: retrying", url: url, cookies: cookies) - response = get(url, retries: retries - 1, **options) - end - - response + request("/deviation/metadata", **params) end - def cookies - Cache.get("deviantart_cookies", 10.years.to_i) do - JSON.parse(Danbooru.config.deviantart_cookies) - end + def request(url, **params) + options = { + base_uri: BASE_URL, + params: { access_token: access_token.token, **params }, + headers: { "Accept-Encoding" => "gzip" }, + format: :plain, + } + + body, code = HTTParty.get(url, **options) + JSON.parse(Zlib.gunzip(body), symbolize_names: true) end - def cookies=(new_cookies) - Cache.put("deviantart_cookies", new_cookies, 10.years.to_i) + def oauth + OAuth2::Client.new(client_id, client_secret, site: "https://www.deviantart.com", token_url: "/oauth2/token") end - def http - HTTP.use(:auto_inflate).headers(Danbooru.config.http_headers.merge("Accept-Encoding" => "gzip")) + def access_token + @access_token = oauth.client_credentials.get_token if @access_token.nil? || @access_token.expired? + @access_token end - memoize :extended_fetch, :extended_fetch_json, :download_url + def access_token=(hash) + @access_token = OAuth2::AccessToken.from_hash(oauth, hash) + end end diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 2c0982f2c..1c4f03d62 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -5,6 +5,7 @@ module Sources Strategies::Pixiv, Strategies::NicoSeiga, Strategies::Twitter, + Strategies::Stash, # must come before DeviantArt Strategies::DeviantArt, Strategies::Tumblr, Strategies::ArtStation, diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index f8e3306e9..1557586af 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -79,45 +79,36 @@ module Sources end def image_urls - [image_url] - end - - def image_url # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? - url - elsif api_deviation[:isDownloadable] - api_client.download_url + [url] + elsif api_deviation[:is_downloadable] + src = api_download[:src] + src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://") + src.sub!(/\?.*\z/, "") # strip s3 query params + src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work + [src] + elsif api_deviation.present? + src = api_deviation.dig(:content, :src) + if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ + src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') + src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") + end + src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") + src = src.sub(%r!q_\d+!, "q_100") + [src] else - media = api_deviation[:media] - token = media[:token].first - fullview = media[:types].find { |data| data[:t] == "fullview" && data[:c].present? } - - if fullview.present? - op = fullview[:c].gsub('', media[:prettyName]) - src = "#{media[:baseUri]}/#{op}?token=#{token}" - else - src = "#{media[:baseUri]}?token=#{token}" - end - - if deviation_id && deviation_id.to_i <= 790677560 && src =~ /\Ahttps:\/\/images-wixmp-/i - src = src.gsub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') - src = src.gsub(%r!/v1/(fit|fill)/.*\z!i, "") - end - - src = src.gsub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") - src = src.gsub(%r!q_\d+,strp!, "q_100") - src + raise "Couldn't find image url" # this should never happen end end def page_url - if api_deviation[:url].present? + if api_deviation.present? api_deviation[:url] - elsif deviation_id.present? - page_url_from_image_url + elsif api_url.present? + api_url else - nil + "" end end @@ -134,7 +125,7 @@ module Sources end def profile_url - return nil if artist_name.blank? + return "" if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" end @@ -143,20 +134,19 @@ module Sources def artist_name if artist_name_from_url.present? artist_name_from_url - elsif api_deviation.dig(:author, :username).present? + elsif api_metadata.present? api_metadata.dig(:author, :username) else - nil + "" end end def artist_commentary_title - api_deviation[:title] + api_metadata[:title] end def artist_commentary_desc - return nil unless api_deviation.dig(:extended, :description).present? - api_deviation.dig(:extended, :description) + api_metadata[:description] end def normalized_for_artist_finder? @@ -172,10 +162,12 @@ module Sources end def tags - return [] unless api_deviation.dig(:extended, :tags).present? + if api_metadata.blank? + return [] + end - api_deviation.dig(:extended, :tags).map do |tag| - [tag[:name], tag[:url]] + api_metadata[:tags].map do |tag| + [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] end end @@ -209,6 +201,8 @@ module Sources end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end + public + def self.deviation_id_from_url(url) if url =~ ASSET $~[:base36_deviation_id].try(:to_i, 36) @@ -251,19 +245,81 @@ module Sources self.class.title_from_url(url) || self.class.title_from_url(referer_url) end - def api_client - @api_client ||= DeviantArtApiClient.new(deviation_id) + def api_url + return nil if deviation_id.blank? + "https://www.deviantart.com/deviation/#{deviation_id}" end - def api_deviation - api_client.extended_fetch_json[:deviation] || {} + def page + return nil if api_url.blank? + + options = Danbooru.config.httparty_options.deep_merge( + format: :plain, + headers: { "Accept-Encoding" => "gzip" } + ) + resp = HTTParty.get(api_url, **options) + + if resp.success? + body = Zlib.gunzip(resp.body) + Nokogiri::HTML(body) + # the work was deleted + elsif resp.code == 404 + nil + else + raise HTTParty::ResponseError.new(resp) + end end + memoize :page + + # Scrape UUID from + # For hidden or deleted works the UUID will be nil. + def uuid + return nil if page.nil? + meta = page.search('meta[property="da:appurl"]').first + return nil if meta.nil? + + appurl = meta["content"] + uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] + uuid + end + memoize :uuid + + def api_client + api_client = DeviantArtApiClient.new( + Danbooru.config.deviantart_client_id, + Danbooru.config.deviantart_client_secret, + Danbooru.config.httparty_options + ) + api_client.access_token = Cache.get("da-access-token", 55.minutes) do + api_client.access_token.to_hash + end + api_client + end + memoize :api_client + + def api_deviation + return {} if uuid.nil? + api_client.deviation(uuid) + end + memoize :api_deviation + + def api_metadata + return {} if uuid.nil? + api_client.metadata(uuid)[:metadata].first + end + memoize :api_metadata + + def api_download + return {} if uuid.nil? + api_client.download(uuid) + end + memoize :api_download def api_response { - code: api_client.extended_fetch.code, - headers: api_client.extended_fetch.headers.to_h, - body: api_client.extended_fetch_json + deviation: api_deviation, + metadata: api_metadata, + download: api_download, } end end diff --git a/app/logical/sources/strategies/stash.rb b/app/logical/sources/strategies/stash.rb new file mode 100644 index 000000000..ebb79255a --- /dev/null +++ b/app/logical/sources/strategies/stash.rb @@ -0,0 +1,55 @@ +# Page URLs: +# * https://sta.sh/0wxs31o7nn2 (single image) +# * https://sta.sh/21leo8mz87ue (folder) +# +# Image URLs: +# * https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png +# +# Ref: +# * https://github.com/danbooru/danbooru/issues/3877 +# * https://www.deviantartsupport.com/en/article/what-is-stash-3391708 +# * https://www.deviantart.com/developers/http/v1/20160316/stash_item/4662dd8b10e336486ea9a0b14da62b74 +# +module Sources + module Strategies + class Stash < DeviantArt + STASH = %r{\Ahttps?://sta\.sh/(?[0-9a-zA-Z]+)}i + + def domains + ["deviantart.net", "sta.sh"] + end + + def match? + parsed_urls.map(&:domain).any?("sta.sh") + end + + def site_name + "Sta.sh" + end + + def canonical_url + page_url + end + + def page_url + "https://sta.sh/#{stash_id}" + end + + def api_url + page_url + end + + def self.stash_id_from_url(url) + if url =~ STASH + $~[:post_id].downcase + else + nil + end + end + + def stash_id + [url, referer_url].map{ |x| self.class.stash_id_from_url(x) }.compact.first + end + end + end +end diff --git a/config/danbooru_default_config.rb b/config/danbooru_default_config.rb index 634dde958..2f4ced182 100644 --- a/config/danbooru_default_config.rb +++ b/config/danbooru_default_config.rb @@ -333,6 +333,15 @@ module Danbooru nil end + # Register at https://www.deviantart.com/developers/ + def deviantart_client_id + nil + end + + def deviantart_client_secret + nil + end + # http://tinysubversions.com/notes/mastodon-bot/ def pawoo_client_id nil diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 8f632b512..b9dc3a90c 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -205,6 +205,7 @@ class ArtistTest < ActiveSupport::TestCase context "when finding deviantart artists" do setup do + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? FactoryBot.create(:artist, :name => "artgerm", :url_string => "http://artgerm.deviantart.com/") FactoryBot.create(:artist, :name => "trixia", :url_string => "http://trixdraws.deviantart.com/") end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index bda6bef2b..dbc084106 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -2,6 +2,11 @@ require 'test_helper' module Sources class DeviantArtTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + context "A page url" do setup do @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") diff --git a/test/unit/sources/stash_test.rb b/test/unit/sources/stash_test.rb new file mode 100644 index 000000000..b005bb9c8 --- /dev/null +++ b/test/unit/sources/stash_test.rb @@ -0,0 +1,58 @@ +require 'test_helper' + +module Sources + class StashTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + + context "A https://sta.sh/:id page url" do + should "work" do + @site = Sources::Strategies.find("https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) + assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + end + end + + context "A https://orig00.deviantart.net/* image url" do + context "with a https://sta.sh/:id referer" do + should "work" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", "https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) + assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + end + end + + context "without a referer" do + should "use the base deviantart strategy" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png") + + # if all we have is the image url, then we can't tell that this is really a sta.sh image. + assert_equal("Deviant Art", @site.site_name) + + # this is the wrong page, but there's no way to know the correct sta.sh page without the referer. + assert_equal("https://www.deviantart.com/deviation/763305148", @site.page_url) + end + end + end + end +end From f58564a71f3958eb88ecda97b14908e3ad866670 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 20:34:36 +0200 Subject: [PATCH 02/16] deviantart: don't rewrite download url it's all handled through something like https://api-da.wixmp.com/_api/download/file?downloadToken=$TOKEN now so those modifications aren't necessary anymore. In fact, the one to "strip s3 query params" removes the token, breaking the download url. --- app/logical/sources/strategies/deviant_art.rb | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 1557586af..b4b785074 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -83,11 +83,7 @@ module Sources if api_deviation.blank? [url] elsif api_deviation[:is_downloadable] - src = api_download[:src] - src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://") - src.sub!(/\?.*\z/, "") # strip s3 query params - src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work - [src] + [api_download[:src]] elsif api_deviation.present? src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ From 1a49ef46f990964b6b8ed1677c28e8a1f25b7425 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 21:01:10 +0200 Subject: [PATCH 03/16] deviantart: cache refresh token for 11 weeks it's valid for 3 months according to this: https://www.deviantart.com/developers/authentication#refresh use 11 weeks instead to be safe --- app/logical/sources/strategies/deviant_art.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index b4b785074..a66da90b1 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -286,7 +286,7 @@ module Sources Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options ) - api_client.access_token = Cache.get("da-access-token", 55.minutes) do + api_client.access_token = Cache.get("da-access-token", 11.weeks) do api_client.access_token.to_hash end api_client From c2e86385a360fed11aa77e94253154d04ac06a46 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 21:17:10 +0200 Subject: [PATCH 04/16] deviantart: don't strip metadata was also part of eba6440b8ba440b4007c4eab2d7840419ada5a85 --- app/logical/sources/strategies/deviant_art.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index a66da90b1..00f4f48f7 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -91,7 +91,7 @@ module Sources src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") end src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") - src = src.sub(%r!q_\d+!, "q_100") + src = src.gsub(%r!q_\d+,strp!, "q_100") [src] else raise "Couldn't find image url" # this should never happen From 769bf87a4a93805c0ef70bef884d596a6906f38d Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 21:33:43 +0200 Subject: [PATCH 05/16] deviantart: don't apply /intermediary/ hack for gifs gifs are always stored as original anyways so the /intermediary/ url doesn't actually exist for gifs example: https://www.deviantart.com/heartgear/art/Silent-Night-579982816 --- app/logical/sources/strategies/deviant_art.rb | 2 +- test/unit/sources/deviant_art_test.rb | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 00f4f48f7..bcb3a3fb7 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -86,7 +86,7 @@ module Sources [api_download[:src]] elsif api_deviation.present? src = api_deviation.dig(:content, :src) - if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ + if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index dbc084106..d1de694be 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -244,6 +244,16 @@ module Sources end end + context "The source for a non-downloadable animated gif with id<=790677560" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/heartgear/art/Silent-Night-579982816") + + # md5: 62caac1863aa264a56d548b4b7607097 + assert_match(%r!\Ahttps://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/f/ea95be00-c5aa-4063-bd55-f5a9183912f7/d9lb1ls-7d625444-0003-4123-bf00-274737ca7fdd.gif\?token=!, @site.image_url) + assert_downloaded(350_156, @site.image_url) + end + end + context "The source for an DeviantArt artwork page" do setup do @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") From f4b4e12235f965c03c7d0b101a0ab3de829d38c6 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 21:39:43 +0200 Subject: [PATCH 06/16] deviantart: use image_url as it's a single image --- app/logical/sources/strategies/deviant_art.rb | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index bcb3a3fb7..1f55c66cb 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -79,11 +79,15 @@ module Sources end def image_urls + [image_url] + end + + def image_url # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? - [url] + url elsif api_deviation[:is_downloadable] - [api_download[:src]] + api_download[:src] elsif api_deviation.present? src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ @@ -92,7 +96,7 @@ module Sources end src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") src = src.gsub(%r!q_\d+,strp!, "q_100") - [src] + src else raise "Couldn't find image url" # this should never happen end From c4a403afca7dd4fa3567c78efdcfb78b718cf97f Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 21:42:00 +0200 Subject: [PATCH 07/16] deviantart: remove unreachable else api_deviation is either #blank? (if condition) or #present? was also part of eba6440b8ba440b4007c4eab2d7840419ada5a85 --- app/logical/sources/strategies/deviant_art.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 1f55c66cb..663da15b0 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -88,7 +88,7 @@ module Sources url elsif api_deviation[:is_downloadable] api_download[:src] - elsif api_deviation.present? + else src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') @@ -97,8 +97,6 @@ module Sources src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") src = src.gsub(%r!q_\d+,strp!, "q_100") src - else - raise "Couldn't find image url" # this should never happen end end From 413227e7de858c21cd137af3d732bc012bdac574 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 23:58:13 +0200 Subject: [PATCH 08/16] deviantart: remove #api_url similar change in eba6440b8ba440b4007c4eab2d7840419ada5a85 in case of #page it may get rid of the redirect if artist and title are found --- app/logical/sources/strategies/deviant_art.rb | 13 ++++--------- app/logical/sources/strategies/stash.rb | 6 +++--- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 663da15b0..30aaf91ef 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -103,8 +103,8 @@ module Sources def page_url if api_deviation.present? api_deviation[:url] - elsif api_url.present? - api_url + elsif deviation_id.present? + page_url_from_image_url else "" end @@ -243,19 +243,14 @@ module Sources self.class.title_from_url(url) || self.class.title_from_url(referer_url) end - def api_url - return nil if deviation_id.blank? - "https://www.deviantart.com/deviation/#{deviation_id}" - end - def page - return nil if api_url.blank? + return nil if page_url_from_image_url.blank? options = Danbooru.config.httparty_options.deep_merge( format: :plain, headers: { "Accept-Encoding" => "gzip" } ) - resp = HTTParty.get(api_url, **options) + resp = HTTParty.get(page_url_from_image_url, **options) if resp.success? body = Zlib.gunzip(resp.body) diff --git a/app/logical/sources/strategies/stash.rb b/app/logical/sources/strategies/stash.rb index ebb79255a..d27fb89ad 100644 --- a/app/logical/sources/strategies/stash.rb +++ b/app/logical/sources/strategies/stash.rb @@ -32,11 +32,11 @@ module Sources end def page_url - "https://sta.sh/#{stash_id}" + page_url_from_image_url end - def api_url - page_url + def page_url_from_image_url + "https://sta.sh/#{stash_id}" end def self.stash_id_from_url(url) From 2794cd254d42ddef4366c21f94bf04b18fbd7e76 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 00:07:36 +0200 Subject: [PATCH 09/16] deviantart: return nil on failure instead of "" was also part of eba6440b8ba440b4007c4eab2d7840419ada5a85 --- app/logical/sources/strategies/deviant_art.rb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 30aaf91ef..d0dda62cf 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -106,7 +106,7 @@ module Sources elsif deviation_id.present? page_url_from_image_url else - "" + nil end end @@ -123,7 +123,7 @@ module Sources end def profile_url - return "" if artist_name.blank? + return nil if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" end @@ -135,7 +135,7 @@ module Sources elsif api_metadata.present? api_metadata.dig(:author, :username) else - "" + nil end end From 46e9f2dede1b86bb16a55fb884b891ee780a4449 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 19:04:24 +0200 Subject: [PATCH 10/16] deviantart: switch to Danbooru::Http httprb doesn't seem to support a base_uri parameter so use URI.join with a relative path instead --- app/logical/deviant_art_api_client.rb | 26 ++++++++----------- app/logical/sources/strategies/deviant_art.rb | 16 ++++-------- 2 files changed, 16 insertions(+), 26 deletions(-) diff --git a/app/logical/deviant_art_api_client.rb b/app/logical/deviant_art_api_client.rb index 07bcf50a2..769690942 100644 --- a/app/logical/deviant_art_api_client.rb +++ b/app/logical/deviant_art_api_client.rb @@ -15,22 +15,22 @@ class DeviantArtApiClient class Error < StandardError; end - BASE_URL = "https://www.deviantart.com/api/v1/oauth2" + BASE_URL = "https://www.deviantart.com/api/v1/oauth2/" - attr_reader :client_id, :client_secret, :httparty_options + attr_reader :client_id, :client_secret - def initialize(client_id, client_secret, httparty_options = {}) - @client_id, @client_secret, @httparty_options = client_id, client_secret, httparty_options + def initialize(client_id, client_secret) + @client_id, @client_secret = client_id, client_secret end # https://www.deviantart.com/developers/http/v1/20160316/deviation_single/bcc296bdf3b5e40636825a942a514816 def deviation(uuid) - request("/deviation/#{uuid}") + request("deviation/#{uuid}") end # https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd def download(uuid, mature_content: "1") - request("/deviation/download/#{uuid}", mature_content: mature_content) + request("deviation/download/#{uuid}", mature_content: mature_content) end # https://www.deviantart.com/developers/http/v1/20160316/deviation_metadata/7824fc14d6fba6acbacca1cf38c24158 @@ -43,19 +43,15 @@ class DeviantArtApiClient ext_stats: ext_stats, } - request("/deviation/metadata", **params) + request("deviation/metadata", **params) end def request(url, **params) - options = { - base_uri: BASE_URL, - params: { access_token: access_token.token, **params }, - headers: { "Accept-Encoding" => "gzip" }, - format: :plain, - } + params = { access_token: access_token.token, **params } - body, code = HTTParty.get(url, **options) - JSON.parse(Zlib.gunzip(body), symbolize_names: true) + url = URI.join(BASE_URL, url).to_s + response = Danbooru::Http.cache(1.minute).get(url, params: params) + response.parse.with_indifferent_access end def oauth diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index d0dda62cf..6f39eea33 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -246,20 +246,15 @@ module Sources def page return nil if page_url_from_image_url.blank? - options = Danbooru.config.httparty_options.deep_merge( - format: :plain, - headers: { "Accept-Encoding" => "gzip" } - ) - resp = HTTParty.get(page_url_from_image_url, **options) + resp = Danbooru::Http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1}) - if resp.success? - body = Zlib.gunzip(resp.body) - Nokogiri::HTML(body) + if resp.status.success? + Nokogiri::HTML(resp.body.to_s) # the work was deleted elsif resp.code == 404 nil else - raise HTTParty::ResponseError.new(resp) + raise "failed to fetch page (got code #{resp.code})" end end memoize :page @@ -280,8 +275,7 @@ module Sources def api_client api_client = DeviantArtApiClient.new( Danbooru.config.deviantart_client_id, - Danbooru.config.deviantart_client_secret, - Danbooru.config.httparty_options + Danbooru.config.deviantart_client_secret ) api_client.access_token = Cache.get("da-access-token", 11.weeks) do api_client.access_token.to_hash From 0d5e31868feecdf28cbd3c44db5c9d0f6cfebe19 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 00:55:06 +0200 Subject: [PATCH 11/16] deviantart: fix non-downloadable flash files --- app/logical/sources/strategies/deviant_art.rb | 2 ++ test/unit/sources/deviant_art_test.rb | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 6f39eea33..e69ee913c 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -88,6 +88,8 @@ module Sources url elsif api_deviation[:is_downloadable] api_download[:src] + elsif api_deviation[:flash].present? + api_deviation.dig(:flash, :src) else src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index d1de694be..884007616 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -254,6 +254,16 @@ module Sources end end + context "The source for a non-downloadable flash file" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/heartgear/art/SL-40v3-522007633") + + # md5: 6adf1a3d532f898f44cf9948cbc7db7d + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) + assert_downloaded(3_496_110, @site.image_url) + end + end + context "The source for an DeviantArt artwork page" do setup do @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") From 70beb7288d297a897f1cf4a2182d677aa137cad6 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 01:08:18 +0200 Subject: [PATCH 12/16] rubocop: fix various issues --- app/logical/deviant_art_api_client.rb | 2 +- app/logical/sources/strategies/deviant_art.rb | 6 ++---- app/logical/sources/strategies/stash.rb | 14 +++++++------- 3 files changed, 10 insertions(+), 12 deletions(-) diff --git a/app/logical/deviant_art_api_client.rb b/app/logical/deviant_art_api_client.rb index 769690942..53abf1c8a 100644 --- a/app/logical/deviant_art_api_client.rb +++ b/app/logical/deviant_art_api_client.rb @@ -40,7 +40,7 @@ class DeviantArtApiClient mature_content: mature_content, ext_submission: ext_submission, ext_camera: ext_camera, - ext_stats: ext_stats, + ext_stats: ext_stats } request("deviation/metadata", **params) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index e69ee913c..c11c69e61 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -201,8 +201,6 @@ module Sources end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end - public - def self.deviation_id_from_url(url) if url =~ ASSET $~[:base36_deviation_id].try(:to_i, 36) @@ -276,7 +274,7 @@ module Sources def api_client api_client = DeviantArtApiClient.new( - Danbooru.config.deviantart_client_id, + Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret ) api_client.access_token = Cache.get("da-access-token", 11.weeks) do @@ -308,7 +306,7 @@ module Sources { deviation: api_deviation, metadata: api_metadata, - download: api_download, + download: api_download } end end diff --git a/app/logical/sources/strategies/stash.rb b/app/logical/sources/strategies/stash.rb index d27fb89ad..76b5cf2c7 100644 --- a/app/logical/sources/strategies/stash.rb +++ b/app/logical/sources/strategies/stash.rb @@ -40,15 +40,15 @@ module Sources end def self.stash_id_from_url(url) - if url =~ STASH - $~[:post_id].downcase - else - nil - end - end + if url =~ STASH + $~[:post_id].downcase + else + nil + end + end def stash_id - [url, referer_url].map{ |x| self.class.stash_id_from_url(x) }.compact.first + [url, referer_url].map { |x| self.class.stash_id_from_url(x) }.compact.first end end end From 0c180b521c402574c6424bd8a5723df0ec4ddf3a Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 12:09:44 +0200 Subject: [PATCH 13/16] deviantart: avoid download api call if not downloadable because it's included in api_response which is part of /source.json --- app/logical/sources/strategies/deviant_art.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index c11c69e61..788f7fa36 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -297,7 +297,7 @@ module Sources memoize :api_metadata def api_download - return {} if uuid.nil? + return {} unless uuid.present? && api_deviation[:is_downloadable] api_client.download(uuid) end memoize :api_download From 9dccee1d5e53f886e25f35662bc538b060c49949 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 12:36:06 +0200 Subject: [PATCH 14/16] stash: fix tests checking image_urls isn't necessary as it's [image_url] per definition --- test/unit/sources/stash_test.rb | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/test/unit/sources/stash_test.rb b/test/unit/sources/stash_test.rb index b005bb9c8..4889869c0 100644 --- a/test/unit/sources/stash_test.rb +++ b/test/unit/sources/stash_test.rb @@ -19,8 +19,7 @@ module Sources assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) - assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) - assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) end end @@ -37,8 +36,7 @@ module Sources assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) - assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) - assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) end end @@ -50,7 +48,7 @@ module Sources assert_equal("Deviant Art", @site.site_name) # this is the wrong page, but there's no way to know the correct sta.sh page without the referer. - assert_equal("https://www.deviantart.com/deviation/763305148", @site.page_url) + assert_equal("https://www.deviantart.com/noizave/art/A-Pepe-763305148", @site.page_url) end end end From 40fa985e2655c09cf9d597f8db45fb7871a4e530 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 16:17:37 +0200 Subject: [PATCH 15/16] deviantart: use #at_css instead of #search only one result needed, query is css --- app/logical/sources/strategies/deviant_art.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 788f7fa36..bc71cae28 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -263,7 +263,7 @@ module Sources # For hidden or deleted works the UUID will be nil. def uuid return nil if page.nil? - meta = page.search('meta[property="da:appurl"]').first + meta = page.at_css('meta[property="da:appurl"]') return nil if meta.nil? appurl = meta["content"] From 45ae8bfb6f8d8b183b87ccc7b76ee7bde57857cc Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Mon, 11 May 2020 16:35:44 +0200 Subject: [PATCH 16/16] deviantart: support non-downloadable videos --- app/logical/sources/strategies/deviant_art.rb | 2 ++ test/unit/sources/deviant_art_test.rb | 10 ++++++++++ 2 files changed, 12 insertions(+) diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index bc71cae28..eb60440b7 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -90,6 +90,8 @@ module Sources api_download[:src] elsif api_deviation[:flash].present? api_deviation.dig(:flash, :src) + elsif api_deviation[:videos].present? + api_deviation[:videos].max_by { |x| x[:filesize] }[:src] else src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index 884007616..9b321f6d2 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -264,6 +264,16 @@ module Sources end end + context "The source for a non-downloadable video file" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/gs-mantis/art/Chen-Goes-Fishing-505847233") + + # md5: 344ac2b9fd5a87982af4b648aa2b2b0d + assert_equal("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/v/mp4/fe046bc7-4d68-4699-96c1-19aa464edff6/d8d6281-91959e92-214f-4b2d-a138-ace09f4b6d09.1080p.8e57939eba634743a9fa41185e398d00.mp4", @site.image_url) + assert_downloaded(9_739_947, @site.image_url) + end + end + context "The source for an DeviantArt artwork page" do setup do @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408")