From 9205c32424d3fd74bdb3175733fec460dd7bfe63 Mon Sep 17 00:00:00 2001 From: lllusion3469 <31420484+lllusion3469@users.noreply.github.com> Date: Sun, 10 May 2020 18:30:10 +0200 Subject: [PATCH] deviantart: revert to 7f482dc35bc7b77bce580ec06b4b708d49d3a29a that's the latest commit made to deviantart files before switching from the developer API to the Javascript backend from the new "Eclipse" frontend. This is necessary because it's basically impossible to download posts now with the JS backend without being logged in, i.e. having the cookies from a logged in user, which can't be used for very long even if exporting them from a browser. You would have to save the cookies deviantart sends you back via the "Set-Cookie" header in a database somewhere in addition to the other added complexity. also * (temporarily) replace HttpartyCache with HTTParty as it's long been removed * fix one case of "last argument as keyword parameter" * change repository url (5d1a1cc87ef5d7409fa81b27e86c0763d8ad0ff6) * remove self-explanatory comment --- app/logical/deviant_art_api_client.rb | 90 ++++++----- app/logical/sources/strategies.rb | 1 + app/logical/sources/strategies/deviant_art.rb | 148 ++++++++++++------ app/logical/sources/strategies/stash.rb | 55 +++++++ config/danbooru_default_config.rb | 9 ++ test/unit/artist_test.rb | 1 + test/unit/sources/deviant_art_test.rb | 5 + test/unit/sources/stash_test.rb | 58 +++++++ 8 files changed, 283 insertions(+), 84 deletions(-) create mode 100644 app/logical/sources/strategies/stash.rb create mode 100644 test/unit/sources/stash_test.rb diff --git a/app/logical/deviant_art_api_client.rb b/app/logical/deviant_art_api_client.rb index 3b8c35734..07bcf50a2 100644 --- a/app/logical/deviant_art_api_client.rb +++ b/app/logical/deviant_art_api_client.rb @@ -1,59 +1,73 @@ -# https://github.com/danbooru/danbooru/issues/4144 +# Authentication is via OAuth2 with the client credentials grant. Register a +# new app at https://www.deviantart.com/developers/ to obtain a client_id and +# client_secret. The app doesn't need to be published. # # API requests must send a user agent and must use gzip compression, otherwise # 403 errors will be returned. +# +# API calls operate on UUIDs. The deviation ID in the URL is not the UUID. UUIDs +# are obtained by scraping the HTML page for the element. +# +# * https://www.deviantart.com/developers/ +# * https://www.deviantart.com/developers/authentication +# * https://www.deviantart.com/developers/errors +# * https://www.deviantart.com/developers/http/v1/20160316 -DeviantArtApiClient = Struct.new(:deviation_id) do - extend Memoist +class DeviantArtApiClient + class Error < StandardError; end + BASE_URL = "https://www.deviantart.com/api/v1/oauth2" - def extended_fetch - params = { deviationid: deviation_id, type: "art", include_session: false } - get("https://www.deviantart.com/_napi/da-deviation/shared_api/deviation/extended_fetch", params: params) + attr_reader :client_id, :client_secret, :httparty_options + + def initialize(client_id, client_secret, httparty_options = {}) + @client_id, @client_secret, @httparty_options = client_id, client_secret, httparty_options end - def extended_fetch_json - JSON.parse(extended_fetch.body).with_indifferent_access + # https://www.deviantart.com/developers/http/v1/20160316/deviation_single/bcc296bdf3b5e40636825a942a514816 + def deviation(uuid) + request("/deviation/#{uuid}") end - def download_url - url = extended_fetch_json.dig(:deviation, :extended, :download, :url) - response = get(url) - response.headers[:location] + # https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd + def download(uuid, mature_content: "1") + request("/deviation/download/#{uuid}", mature_content: mature_content) end - def get(url, retries: 1, **options) - response = http.cookies(cookies).get(url, **options) + # https://www.deviantart.com/developers/http/v1/20160316/deviation_metadata/7824fc14d6fba6acbacca1cf38c24158 + def metadata(*uuids, mature_content: "1", ext_submission: "1", ext_camera: "1", ext_stats: "1") + params = { + deviationids: uuids.flatten, + mature_content: mature_content, + ext_submission: ext_submission, + ext_camera: ext_camera, + ext_stats: ext_stats, + } - new_cookies = response.cookies.cookies.map { |cookie| { cookie.name => cookie.value } }.reduce(&:merge) - new_cookies = new_cookies.slice(:userinfo, :auth, :authsecure) - if new_cookies.present? - DanbooruLogger.info("DeviantArt: updating cookies", url: url, new_cookies: new_cookies, old_cookies: cookies) - self.cookies = new_cookies - end - - # If the old auth cookie expired we may get a 404 with a new auth cookie - # set. Try again with the new cookie. - if response.code == 404 && retries > 0 - DanbooruLogger.info("DeviantArt: retrying", url: url, cookies: cookies) - response = get(url, retries: retries - 1, **options) - end - - response + request("/deviation/metadata", **params) end - def cookies - Cache.get("deviantart_cookies", 10.years.to_i) do - JSON.parse(Danbooru.config.deviantart_cookies) - end + def request(url, **params) + options = { + base_uri: BASE_URL, + params: { access_token: access_token.token, **params }, + headers: { "Accept-Encoding" => "gzip" }, + format: :plain, + } + + body, code = HTTParty.get(url, **options) + JSON.parse(Zlib.gunzip(body), symbolize_names: true) end - def cookies=(new_cookies) - Cache.put("deviantart_cookies", new_cookies, 10.years.to_i) + def oauth + OAuth2::Client.new(client_id, client_secret, site: "https://www.deviantart.com", token_url: "/oauth2/token") end - def http - HTTP.use(:auto_inflate).headers(Danbooru.config.http_headers.merge("Accept-Encoding" => "gzip")) + def access_token + @access_token = oauth.client_credentials.get_token if @access_token.nil? || @access_token.expired? + @access_token end - memoize :extended_fetch, :extended_fetch_json, :download_url + def access_token=(hash) + @access_token = OAuth2::AccessToken.from_hash(oauth, hash) + end end diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 2c0982f2c..1c4f03d62 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -5,6 +5,7 @@ module Sources Strategies::Pixiv, Strategies::NicoSeiga, Strategies::Twitter, + Strategies::Stash, # must come before DeviantArt Strategies::DeviantArt, Strategies::Tumblr, Strategies::ArtStation, diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index f8e3306e9..1557586af 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -79,45 +79,36 @@ module Sources end def image_urls - [image_url] - end - - def image_url # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? - url - elsif api_deviation[:isDownloadable] - api_client.download_url + [url] + elsif api_deviation[:is_downloadable] + src = api_download[:src] + src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://") + src.sub!(/\?.*\z/, "") # strip s3 query params + src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work + [src] + elsif api_deviation.present? + src = api_deviation.dig(:content, :src) + if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ + src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') + src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") + end + src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") + src = src.sub(%r!q_\d+!, "q_100") + [src] else - media = api_deviation[:media] - token = media[:token].first - fullview = media[:types].find { |data| data[:t] == "fullview" && data[:c].present? } - - if fullview.present? - op = fullview[:c].gsub('', media[:prettyName]) - src = "#{media[:baseUri]}/#{op}?token=#{token}" - else - src = "#{media[:baseUri]}?token=#{token}" - end - - if deviation_id && deviation_id.to_i <= 790677560 && src =~ /\Ahttps:\/\/images-wixmp-/i - src = src.gsub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') - src = src.gsub(%r!/v1/(fit|fill)/.*\z!i, "") - end - - src = src.gsub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") - src = src.gsub(%r!q_\d+,strp!, "q_100") - src + raise "Couldn't find image url" # this should never happen end end def page_url - if api_deviation[:url].present? + if api_deviation.present? api_deviation[:url] - elsif deviation_id.present? - page_url_from_image_url + elsif api_url.present? + api_url else - nil + "" end end @@ -134,7 +125,7 @@ module Sources end def profile_url - return nil if artist_name.blank? + return "" if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" end @@ -143,20 +134,19 @@ module Sources def artist_name if artist_name_from_url.present? artist_name_from_url - elsif api_deviation.dig(:author, :username).present? + elsif api_metadata.present? api_metadata.dig(:author, :username) else - nil + "" end end def artist_commentary_title - api_deviation[:title] + api_metadata[:title] end def artist_commentary_desc - return nil unless api_deviation.dig(:extended, :description).present? - api_deviation.dig(:extended, :description) + api_metadata[:description] end def normalized_for_artist_finder? @@ -172,10 +162,12 @@ module Sources end def tags - return [] unless api_deviation.dig(:extended, :tags).present? + if api_metadata.blank? + return [] + end - api_deviation.dig(:extended, :tags).map do |tag| - [tag[:name], tag[:url]] + api_metadata[:tags].map do |tag| + [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] end end @@ -209,6 +201,8 @@ module Sources end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end + public + def self.deviation_id_from_url(url) if url =~ ASSET $~[:base36_deviation_id].try(:to_i, 36) @@ -251,19 +245,81 @@ module Sources self.class.title_from_url(url) || self.class.title_from_url(referer_url) end - def api_client - @api_client ||= DeviantArtApiClient.new(deviation_id) + def api_url + return nil if deviation_id.blank? + "https://www.deviantart.com/deviation/#{deviation_id}" end - def api_deviation - api_client.extended_fetch_json[:deviation] || {} + def page + return nil if api_url.blank? + + options = Danbooru.config.httparty_options.deep_merge( + format: :plain, + headers: { "Accept-Encoding" => "gzip" } + ) + resp = HTTParty.get(api_url, **options) + + if resp.success? + body = Zlib.gunzip(resp.body) + Nokogiri::HTML(body) + # the work was deleted + elsif resp.code == 404 + nil + else + raise HTTParty::ResponseError.new(resp) + end end + memoize :page + + # Scrape UUID from + # For hidden or deleted works the UUID will be nil. + def uuid + return nil if page.nil? + meta = page.search('meta[property="da:appurl"]').first + return nil if meta.nil? + + appurl = meta["content"] + uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] + uuid + end + memoize :uuid + + def api_client + api_client = DeviantArtApiClient.new( + Danbooru.config.deviantart_client_id, + Danbooru.config.deviantart_client_secret, + Danbooru.config.httparty_options + ) + api_client.access_token = Cache.get("da-access-token", 55.minutes) do + api_client.access_token.to_hash + end + api_client + end + memoize :api_client + + def api_deviation + return {} if uuid.nil? + api_client.deviation(uuid) + end + memoize :api_deviation + + def api_metadata + return {} if uuid.nil? + api_client.metadata(uuid)[:metadata].first + end + memoize :api_metadata + + def api_download + return {} if uuid.nil? + api_client.download(uuid) + end + memoize :api_download def api_response { - code: api_client.extended_fetch.code, - headers: api_client.extended_fetch.headers.to_h, - body: api_client.extended_fetch_json + deviation: api_deviation, + metadata: api_metadata, + download: api_download, } end end diff --git a/app/logical/sources/strategies/stash.rb b/app/logical/sources/strategies/stash.rb new file mode 100644 index 000000000..ebb79255a --- /dev/null +++ b/app/logical/sources/strategies/stash.rb @@ -0,0 +1,55 @@ +# Page URLs: +# * https://sta.sh/0wxs31o7nn2 (single image) +# * https://sta.sh/21leo8mz87ue (folder) +# +# Image URLs: +# * https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png +# +# Ref: +# * https://github.com/danbooru/danbooru/issues/3877 +# * https://www.deviantartsupport.com/en/article/what-is-stash-3391708 +# * https://www.deviantart.com/developers/http/v1/20160316/stash_item/4662dd8b10e336486ea9a0b14da62b74 +# +module Sources + module Strategies + class Stash < DeviantArt + STASH = %r{\Ahttps?://sta\.sh/(?[0-9a-zA-Z]+)}i + + def domains + ["deviantart.net", "sta.sh"] + end + + def match? + parsed_urls.map(&:domain).any?("sta.sh") + end + + def site_name + "Sta.sh" + end + + def canonical_url + page_url + end + + def page_url + "https://sta.sh/#{stash_id}" + end + + def api_url + page_url + end + + def self.stash_id_from_url(url) + if url =~ STASH + $~[:post_id].downcase + else + nil + end + end + + def stash_id + [url, referer_url].map{ |x| self.class.stash_id_from_url(x) }.compact.first + end + end + end +end diff --git a/config/danbooru_default_config.rb b/config/danbooru_default_config.rb index 634dde958..2f4ced182 100644 --- a/config/danbooru_default_config.rb +++ b/config/danbooru_default_config.rb @@ -333,6 +333,15 @@ module Danbooru nil end + # Register at https://www.deviantart.com/developers/ + def deviantart_client_id + nil + end + + def deviantart_client_secret + nil + end + # http://tinysubversions.com/notes/mastodon-bot/ def pawoo_client_id nil diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 8f632b512..b9dc3a90c 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -205,6 +205,7 @@ class ArtistTest < ActiveSupport::TestCase context "when finding deviantart artists" do setup do + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? FactoryBot.create(:artist, :name => "artgerm", :url_string => "http://artgerm.deviantart.com/") FactoryBot.create(:artist, :name => "trixia", :url_string => "http://trixdraws.deviantart.com/") end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index bda6bef2b..dbc084106 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -2,6 +2,11 @@ require 'test_helper' module Sources class DeviantArtTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + context "A page url" do setup do @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") diff --git a/test/unit/sources/stash_test.rb b/test/unit/sources/stash_test.rb new file mode 100644 index 000000000..b005bb9c8 --- /dev/null +++ b/test/unit/sources/stash_test.rb @@ -0,0 +1,58 @@ +require 'test_helper' + +module Sources + class StashTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + + context "A https://sta.sh/:id page url" do + should "work" do + @site = Sources::Strategies.find("https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) + assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + end + end + + context "A https://orig00.deviantart.net/* image url" do + context "with a https://sta.sh/:id referer" do + should "work" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", "https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url) + assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls) + end + end + + context "without a referer" do + should "use the base deviantart strategy" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png") + + # if all we have is the image url, then we can't tell that this is really a sta.sh image. + assert_equal("Deviant Art", @site.site_name) + + # this is the wrong page, but there's no way to know the correct sta.sh page without the referer. + assert_equal("https://www.deviantart.com/deviation/763305148", @site.page_url) + end + end + end + end +end