diff --git a/app/logical/deviant_art_api_client.rb b/app/logical/deviant_art_api_client.rb index 3b8c35734..53abf1c8a 100644 --- a/app/logical/deviant_art_api_client.rb +++ b/app/logical/deviant_art_api_client.rb @@ -1,59 +1,69 @@ -# https://github.com/danbooru/danbooru/issues/4144 +# Authentication is via OAuth2 with the client credentials grant. Register a +# new app at https://www.deviantart.com/developers/ to obtain a client_id and +# client_secret. The app doesn't need to be published. # # API requests must send a user agent and must use gzip compression, otherwise # 403 errors will be returned. +# +# API calls operate on UUIDs. The deviation ID in the URL is not the UUID. UUIDs +# are obtained by scraping the HTML page for the element. +# +# * https://www.deviantart.com/developers/ +# * https://www.deviantart.com/developers/authentication +# * https://www.deviantart.com/developers/errors +# * https://www.deviantart.com/developers/http/v1/20160316 -DeviantArtApiClient = Struct.new(:deviation_id) do - extend Memoist +class DeviantArtApiClient + class Error < StandardError; end + BASE_URL = "https://www.deviantart.com/api/v1/oauth2/" - def extended_fetch - params = { deviationid: deviation_id, type: "art", include_session: false } - get("https://www.deviantart.com/_napi/da-deviation/shared_api/deviation/extended_fetch", params: params) + attr_reader :client_id, :client_secret + + def initialize(client_id, client_secret) + @client_id, @client_secret = client_id, client_secret end - def extended_fetch_json - JSON.parse(extended_fetch.body).with_indifferent_access + # https://www.deviantart.com/developers/http/v1/20160316/deviation_single/bcc296bdf3b5e40636825a942a514816 + def deviation(uuid) + request("deviation/#{uuid}") end - def download_url - url = extended_fetch_json.dig(:deviation, :extended, :download, :url) - response = get(url) - response.headers[:location] + # https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd + def download(uuid, mature_content: "1") + request("deviation/download/#{uuid}", mature_content: mature_content) end - def get(url, retries: 1, **options) - response = http.cookies(cookies).get(url, **options) + # https://www.deviantart.com/developers/http/v1/20160316/deviation_metadata/7824fc14d6fba6acbacca1cf38c24158 + def metadata(*uuids, mature_content: "1", ext_submission: "1", ext_camera: "1", ext_stats: "1") + params = { + deviationids: uuids.flatten, + mature_content: mature_content, + ext_submission: ext_submission, + ext_camera: ext_camera, + ext_stats: ext_stats + } - new_cookies = response.cookies.cookies.map { |cookie| { cookie.name => cookie.value } }.reduce(&:merge) - new_cookies = new_cookies.slice(:userinfo, :auth, :authsecure) - if new_cookies.present? - DanbooruLogger.info("DeviantArt: updating cookies", url: url, new_cookies: new_cookies, old_cookies: cookies) - self.cookies = new_cookies - end - - # If the old auth cookie expired we may get a 404 with a new auth cookie - # set. Try again with the new cookie. - if response.code == 404 && retries > 0 - DanbooruLogger.info("DeviantArt: retrying", url: url, cookies: cookies) - response = get(url, retries: retries - 1, **options) - end - - response + request("deviation/metadata", **params) end - def cookies - Cache.get("deviantart_cookies", 10.years.to_i) do - JSON.parse(Danbooru.config.deviantart_cookies) - end + def request(url, **params) + params = { access_token: access_token.token, **params } + + url = URI.join(BASE_URL, url).to_s + response = Danbooru::Http.cache(1.minute).get(url, params: params) + response.parse.with_indifferent_access end - def cookies=(new_cookies) - Cache.put("deviantart_cookies", new_cookies, 10.years.to_i) + def oauth + OAuth2::Client.new(client_id, client_secret, site: "https://www.deviantart.com", token_url: "/oauth2/token") end - def http - HTTP.use(:auto_inflate).headers(Danbooru.config.http_headers.merge("Accept-Encoding" => "gzip")) + def access_token + @access_token = oauth.client_credentials.get_token if @access_token.nil? || @access_token.expired? + @access_token end - memoize :extended_fetch, :extended_fetch_json, :download_url + def access_token=(hash) + @access_token = OAuth2::AccessToken.from_hash(oauth, hash) + end end diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 5f68e7153..7f2b3be98 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -5,6 +5,7 @@ module Sources Strategies::Pixiv, Strategies::NicoSeiga, Strategies::Twitter, + Strategies::Stash, # must come before DeviantArt Strategies::DeviantArt, Strategies::Tumblr, Strategies::ArtStation, diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index f8e3306e9..eb60440b7 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -86,33 +86,26 @@ module Sources # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? url - elsif api_deviation[:isDownloadable] - api_client.download_url + elsif api_deviation[:is_downloadable] + api_download[:src] + elsif api_deviation[:flash].present? + api_deviation.dig(:flash, :src) + elsif api_deviation[:videos].present? + api_deviation[:videos].max_by { |x| x[:filesize] }[:src] else - media = api_deviation[:media] - token = media[:token].first - fullview = media[:types].find { |data| data[:t] == "fullview" && data[:c].present? } - - if fullview.present? - op = fullview[:c].gsub('', media[:prettyName]) - src = "#{media[:baseUri]}/#{op}?token=#{token}" - else - src = "#{media[:baseUri]}?token=#{token}" + src = api_deviation.dig(:content, :src) + if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/ + src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') + src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "") end - - if deviation_id && deviation_id.to_i <= 790677560 && src =~ /\Ahttps:\/\/images-wixmp-/i - src = src.gsub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1') - src = src.gsub(%r!/v1/(fit|fill)/.*\z!i, "") - end - - src = src.gsub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") + src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net") src = src.gsub(%r!q_\d+,strp!, "q_100") src end end def page_url - if api_deviation[:url].present? + if api_deviation.present? api_deviation[:url] elsif deviation_id.present? page_url_from_image_url @@ -143,7 +136,7 @@ module Sources def artist_name if artist_name_from_url.present? artist_name_from_url - elsif api_deviation.dig(:author, :username).present? + elsif api_metadata.present? api_metadata.dig(:author, :username) else nil @@ -151,12 +144,11 @@ module Sources end def artist_commentary_title - api_deviation[:title] + api_metadata[:title] end def artist_commentary_desc - return nil unless api_deviation.dig(:extended, :description).present? - api_deviation.dig(:extended, :description) + api_metadata[:description] end def normalized_for_artist_finder? @@ -172,10 +164,12 @@ module Sources end def tags - return [] unless api_deviation.dig(:extended, :tags).present? + if api_metadata.blank? + return [] + end - api_deviation.dig(:extended, :tags).map do |tag| - [tag[:name], tag[:url]] + api_metadata[:tags].map do |tag| + [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] end end @@ -251,19 +245,70 @@ module Sources self.class.title_from_url(url) || self.class.title_from_url(referer_url) end - def api_client - @api_client ||= DeviantArtApiClient.new(deviation_id) + def page + return nil if page_url_from_image_url.blank? + + resp = Danbooru::Http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1}) + + if resp.status.success? + Nokogiri::HTML(resp.body.to_s) + # the work was deleted + elsif resp.code == 404 + nil + else + raise "failed to fetch page (got code #{resp.code})" + end end + memoize :page + + # Scrape UUID from + # For hidden or deleted works the UUID will be nil. + def uuid + return nil if page.nil? + meta = page.at_css('meta[property="da:appurl"]') + return nil if meta.nil? + + appurl = meta["content"] + uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1] + uuid + end + memoize :uuid + + def api_client + api_client = DeviantArtApiClient.new( + Danbooru.config.deviantart_client_id, + Danbooru.config.deviantart_client_secret + ) + api_client.access_token = Cache.get("da-access-token", 11.weeks) do + api_client.access_token.to_hash + end + api_client + end + memoize :api_client def api_deviation - api_client.extended_fetch_json[:deviation] || {} + return {} if uuid.nil? + api_client.deviation(uuid) end + memoize :api_deviation + + def api_metadata + return {} if uuid.nil? + api_client.metadata(uuid)[:metadata].first + end + memoize :api_metadata + + def api_download + return {} unless uuid.present? && api_deviation[:is_downloadable] + api_client.download(uuid) + end + memoize :api_download def api_response { - code: api_client.extended_fetch.code, - headers: api_client.extended_fetch.headers.to_h, - body: api_client.extended_fetch_json + deviation: api_deviation, + metadata: api_metadata, + download: api_download } end end diff --git a/app/logical/sources/strategies/stash.rb b/app/logical/sources/strategies/stash.rb new file mode 100644 index 000000000..76b5cf2c7 --- /dev/null +++ b/app/logical/sources/strategies/stash.rb @@ -0,0 +1,55 @@ +# Page URLs: +# * https://sta.sh/0wxs31o7nn2 (single image) +# * https://sta.sh/21leo8mz87ue (folder) +# +# Image URLs: +# * https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png +# +# Ref: +# * https://github.com/danbooru/danbooru/issues/3877 +# * https://www.deviantartsupport.com/en/article/what-is-stash-3391708 +# * https://www.deviantart.com/developers/http/v1/20160316/stash_item/4662dd8b10e336486ea9a0b14da62b74 +# +module Sources + module Strategies + class Stash < DeviantArt + STASH = %r{\Ahttps?://sta\.sh/(?[0-9a-zA-Z]+)}i + + def domains + ["deviantart.net", "sta.sh"] + end + + def match? + parsed_urls.map(&:domain).any?("sta.sh") + end + + def site_name + "Sta.sh" + end + + def canonical_url + page_url + end + + def page_url + page_url_from_image_url + end + + def page_url_from_image_url + "https://sta.sh/#{stash_id}" + end + + def self.stash_id_from_url(url) + if url =~ STASH + $~[:post_id].downcase + else + nil + end + end + + def stash_id + [url, referer_url].map { |x| self.class.stash_id_from_url(x) }.compact.first + end + end + end +end diff --git a/config/danbooru_default_config.rb b/config/danbooru_default_config.rb index 634dde958..2f4ced182 100644 --- a/config/danbooru_default_config.rb +++ b/config/danbooru_default_config.rb @@ -333,6 +333,15 @@ module Danbooru nil end + # Register at https://www.deviantart.com/developers/ + def deviantart_client_id + nil + end + + def deviantart_client_secret + nil + end + # http://tinysubversions.com/notes/mastodon-bot/ def pawoo_client_id nil diff --git a/test/unit/artist_test.rb b/test/unit/artist_test.rb index 8f632b512..b9dc3a90c 100644 --- a/test/unit/artist_test.rb +++ b/test/unit/artist_test.rb @@ -205,6 +205,7 @@ class ArtistTest < ActiveSupport::TestCase context "when finding deviantart artists" do setup do + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? FactoryBot.create(:artist, :name => "artgerm", :url_string => "http://artgerm.deviantart.com/") FactoryBot.create(:artist, :name => "trixia", :url_string => "http://trixdraws.deviantart.com/") end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index bda6bef2b..9b321f6d2 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -2,6 +2,11 @@ require 'test_helper' module Sources class DeviantArtTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + context "A page url" do setup do @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") @@ -239,6 +244,36 @@ module Sources end end + context "The source for a non-downloadable animated gif with id<=790677560" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/heartgear/art/Silent-Night-579982816") + + # md5: 62caac1863aa264a56d548b4b7607097 + assert_match(%r!\Ahttps://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/f/ea95be00-c5aa-4063-bd55-f5a9183912f7/d9lb1ls-7d625444-0003-4123-bf00-274737ca7fdd.gif\?token=!, @site.image_url) + assert_downloaded(350_156, @site.image_url) + end + end + + context "The source for a non-downloadable flash file" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/heartgear/art/SL-40v3-522007633") + + # md5: 6adf1a3d532f898f44cf9948cbc7db7d + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) + assert_downloaded(3_496_110, @site.image_url) + end + end + + context "The source for a non-downloadable video file" do + should "return working image url" do + @site = Sources::Strategies.find("https://www.deviantart.com/gs-mantis/art/Chen-Goes-Fishing-505847233") + + # md5: 344ac2b9fd5a87982af4b648aa2b2b0d + assert_equal("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/v/mp4/fe046bc7-4d68-4699-96c1-19aa464edff6/d8d6281-91959e92-214f-4b2d-a138-ace09f4b6d09.1080p.8e57939eba634743a9fa41185e398d00.mp4", @site.image_url) + assert_downloaded(9_739_947, @site.image_url) + end + end + context "The source for an DeviantArt artwork page" do setup do @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408") diff --git a/test/unit/sources/stash_test.rb b/test/unit/sources/stash_test.rb new file mode 100644 index 000000000..4889869c0 --- /dev/null +++ b/test/unit/sources/stash_test.rb @@ -0,0 +1,56 @@ +require 'test_helper' + +module Sources + class StashTest < ActiveSupport::TestCase + def setup + super + skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present? + end + + context "A https://sta.sh/:id page url" do + should "work" do + @site = Sources::Strategies.find("https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) + end + end + + context "A https://orig00.deviantart.net/* image url" do + context "with a https://sta.sh/:id referer" do + should "work" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", "https://sta.sh/0wxs31o7nn2") + + assert_equal("noizave", @site.artist_name) + assert_equal("https://www.deviantart.com/noizave", @site.profile_url) + + assert_equal("A pepe", @site.artist_commentary_title) + assert_equal("This is a test.", @site.artist_commentary_desc) + + assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) + assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) + assert_match(%r!\Ahttps://api-da\.wixmp\.com/_api/download/file\?downloadToken=!, @site.image_url) + end + end + + context "without a referer" do + should "use the base deviantart strategy" do + @site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png") + + # if all we have is the image url, then we can't tell that this is really a sta.sh image. + assert_equal("Deviant Art", @site.site_name) + + # this is the wrong page, but there's no way to know the correct sta.sh page without the referer. + assert_equal("https://www.deviantart.com/noizave/art/A-Pepe-763305148", @site.page_url) + end + end + end + end +end