deviantart: revert to 7f482dc35b

that's the latest commit made to deviantart files before switching from
the developer API to the Javascript backend from the new "Eclipse"
frontend.
This is necessary because it's basically impossible to download posts
now with the JS backend without being logged in, i.e. having the cookies
from a logged in user, which can't be used for very long even if
exporting them from a browser. You would have to save the cookies
deviantart sends you back via the "Set-Cookie" header in a database
somewhere in addition to the other added complexity.

also
* (temporarily) replace HttpartyCache with HTTParty as it's long been
  removed
* fix one case of "last argument as keyword parameter"
* change repository url (5d1a1cc87e)
* remove self-explanatory comment
This commit is contained in:
lllusion3469
2020-05-10 18:30:10 +02:00
parent d136a12a65
commit 9205c32424
8 changed files with 283 additions and 84 deletions

View File

@@ -1,59 +1,73 @@
# https://github.com/danbooru/danbooru/issues/4144 # Authentication is via OAuth2 with the client credentials grant. Register a
# new app at https://www.deviantart.com/developers/ to obtain a client_id and
# client_secret. The app doesn't need to be published.
# #
# API requests must send a user agent and must use gzip compression, otherwise # API requests must send a user agent and must use gzip compression, otherwise
# 403 errors will be returned. # 403 errors will be returned.
#
# API calls operate on UUIDs. The deviation ID in the URL is not the UUID. UUIDs
# are obtained by scraping the HTML page for the <meta property="da:appurl"> element.
#
# * https://www.deviantart.com/developers/
# * https://www.deviantart.com/developers/authentication
# * https://www.deviantart.com/developers/errors
# * https://www.deviantart.com/developers/http/v1/20160316
DeviantArtApiClient = Struct.new(:deviation_id) do class DeviantArtApiClient
extend Memoist class Error < StandardError; end
BASE_URL = "https://www.deviantart.com/api/v1/oauth2"
def extended_fetch attr_reader :client_id, :client_secret, :httparty_options
params = { deviationid: deviation_id, type: "art", include_session: false }
get("https://www.deviantart.com/_napi/da-deviation/shared_api/deviation/extended_fetch", params: params) def initialize(client_id, client_secret, httparty_options = {})
@client_id, @client_secret, @httparty_options = client_id, client_secret, httparty_options
end end
def extended_fetch_json # https://www.deviantart.com/developers/http/v1/20160316/deviation_single/bcc296bdf3b5e40636825a942a514816
JSON.parse(extended_fetch.body).with_indifferent_access def deviation(uuid)
request("/deviation/#{uuid}")
end end
def download_url # https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd
url = extended_fetch_json.dig(:deviation, :extended, :download, :url) def download(uuid, mature_content: "1")
response = get(url) request("/deviation/download/#{uuid}", mature_content: mature_content)
response.headers[:location]
end end
def get(url, retries: 1, **options) # https://www.deviantart.com/developers/http/v1/20160316/deviation_metadata/7824fc14d6fba6acbacca1cf38c24158
response = http.cookies(cookies).get(url, **options) def metadata(*uuids, mature_content: "1", ext_submission: "1", ext_camera: "1", ext_stats: "1")
params = {
deviationids: uuids.flatten,
mature_content: mature_content,
ext_submission: ext_submission,
ext_camera: ext_camera,
ext_stats: ext_stats,
}
new_cookies = response.cookies.cookies.map { |cookie| { cookie.name => cookie.value } }.reduce(&:merge) request("/deviation/metadata", **params)
new_cookies = new_cookies.slice(:userinfo, :auth, :authsecure)
if new_cookies.present?
DanbooruLogger.info("DeviantArt: updating cookies", url: url, new_cookies: new_cookies, old_cookies: cookies)
self.cookies = new_cookies
end
# If the old auth cookie expired we may get a 404 with a new auth cookie
# set. Try again with the new cookie.
if response.code == 404 && retries > 0
DanbooruLogger.info("DeviantArt: retrying", url: url, cookies: cookies)
response = get(url, retries: retries - 1, **options)
end
response
end end
def cookies def request(url, **params)
Cache.get("deviantart_cookies", 10.years.to_i) do options = {
JSON.parse(Danbooru.config.deviantart_cookies) base_uri: BASE_URL,
end params: { access_token: access_token.token, **params },
headers: { "Accept-Encoding" => "gzip" },
format: :plain,
}
body, code = HTTParty.get(url, **options)
JSON.parse(Zlib.gunzip(body), symbolize_names: true)
end end
def cookies=(new_cookies) def oauth
Cache.put("deviantart_cookies", new_cookies, 10.years.to_i) OAuth2::Client.new(client_id, client_secret, site: "https://www.deviantart.com", token_url: "/oauth2/token")
end end
def http def access_token
HTTP.use(:auto_inflate).headers(Danbooru.config.http_headers.merge("Accept-Encoding" => "gzip")) @access_token = oauth.client_credentials.get_token if @access_token.nil? || @access_token.expired?
@access_token
end end
memoize :extended_fetch, :extended_fetch_json, :download_url def access_token=(hash)
@access_token = OAuth2::AccessToken.from_hash(oauth, hash)
end
end end

View File

@@ -5,6 +5,7 @@ module Sources
Strategies::Pixiv, Strategies::Pixiv,
Strategies::NicoSeiga, Strategies::NicoSeiga,
Strategies::Twitter, Strategies::Twitter,
Strategies::Stash, # must come before DeviantArt
Strategies::DeviantArt, Strategies::DeviantArt,
Strategies::Tumblr, Strategies::Tumblr,
Strategies::ArtStation, Strategies::ArtStation,

View File

@@ -79,45 +79,36 @@ module Sources
end end
def image_urls def image_urls
[image_url]
end
def image_url
# work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
if api_deviation.blank? if api_deviation.blank?
url [url]
elsif api_deviation[:isDownloadable] elsif api_deviation[:is_downloadable]
api_client.download_url src = api_download[:src]
src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.sub!(/\?.*\z/, "") # strip s3 query params
src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
[src]
elsif api_deviation.present?
src = api_deviation.dig(:content, :src)
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/
src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1')
src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "")
end
src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
src = src.sub(%r!q_\d+!, "q_100")
[src]
else else
media = api_deviation[:media] raise "Couldn't find image url" # this should never happen
token = media[:token].first
fullview = media[:types].find { |data| data[:t] == "fullview" && data[:c].present? }
if fullview.present?
op = fullview[:c].gsub('<prettyName>', media[:prettyName])
src = "#{media[:baseUri]}/#{op}?token=#{token}"
else
src = "#{media[:baseUri]}?token=#{token}"
end
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /\Ahttps:\/\/images-wixmp-/i
src = src.gsub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1')
src = src.gsub(%r!/v1/(fit|fill)/.*\z!i, "")
end
src = src.gsub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
src = src.gsub(%r!q_\d+,strp!, "q_100")
src
end end
end end
def page_url def page_url
if api_deviation[:url].present? if api_deviation.present?
api_deviation[:url] api_deviation[:url]
elsif deviation_id.present? elsif api_url.present?
page_url_from_image_url api_url
else else
nil ""
end end
end end
@@ -134,7 +125,7 @@ module Sources
end end
def profile_url def profile_url
return nil if artist_name.blank? return "" if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}" "https://www.deviantart.com/#{artist_name.downcase}"
end end
@@ -143,20 +134,19 @@ module Sources
def artist_name def artist_name
if artist_name_from_url.present? if artist_name_from_url.present?
artist_name_from_url artist_name_from_url
elsif api_deviation.dig(:author, :username).present? elsif api_metadata.present?
api_metadata.dig(:author, :username) api_metadata.dig(:author, :username)
else else
nil ""
end end
end end
def artist_commentary_title def artist_commentary_title
api_deviation[:title] api_metadata[:title]
end end
def artist_commentary_desc def artist_commentary_desc
return nil unless api_deviation.dig(:extended, :description).present? api_metadata[:description]
api_deviation.dig(:extended, :description)
end end
def normalized_for_artist_finder? def normalized_for_artist_finder?
@@ -172,10 +162,12 @@ module Sources
end end
def tags def tags
return [] unless api_deviation.dig(:extended, :tags).present? if api_metadata.blank?
return []
end
api_deviation.dig(:extended, :tags).map do |tag| api_metadata[:tags].map do |tag|
[tag[:name], tag[:url]] [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end end
end end
@@ -209,6 +201,8 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end end
public
def self.deviation_id_from_url(url) def self.deviation_id_from_url(url)
if url =~ ASSET if url =~ ASSET
$~[:base36_deviation_id].try(:to_i, 36) $~[:base36_deviation_id].try(:to_i, 36)
@@ -251,19 +245,81 @@ module Sources
self.class.title_from_url(url) || self.class.title_from_url(referer_url) self.class.title_from_url(url) || self.class.title_from_url(referer_url)
end end
def api_client def api_url
@api_client ||= DeviantArtApiClient.new(deviation_id) return nil if deviation_id.blank?
"https://www.deviantart.com/deviation/#{deviation_id}"
end end
def api_deviation def page
api_client.extended_fetch_json[:deviation] || {} return nil if api_url.blank?
options = Danbooru.config.httparty_options.deep_merge(
format: :plain,
headers: { "Accept-Encoding" => "gzip" }
)
resp = HTTParty.get(api_url, **options)
if resp.success?
body = Zlib.gunzip(resp.body)
Nokogiri::HTML(body)
# the work was deleted
elsif resp.code == 404
nil
else
raise HTTParty::ResponseError.new(resp)
end
end end
memoize :page
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
# For hidden or deleted works the UUID will be nil.
def uuid
return nil if page.nil?
meta = page.search('meta[property="da:appurl"]').first
return nil if meta.nil?
appurl = meta["content"]
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
uuid
end
memoize :uuid
def api_client
api_client = DeviantArtApiClient.new(
Danbooru.config.deviantart_client_id,
Danbooru.config.deviantart_client_secret,
Danbooru.config.httparty_options
)
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
api_client.access_token.to_hash
end
api_client
end
memoize :api_client
def api_deviation
return {} if uuid.nil?
api_client.deviation(uuid)
end
memoize :api_deviation
def api_metadata
return {} if uuid.nil?
api_client.metadata(uuid)[:metadata].first
end
memoize :api_metadata
def api_download
return {} if uuid.nil?
api_client.download(uuid)
end
memoize :api_download
def api_response def api_response
{ {
code: api_client.extended_fetch.code, deviation: api_deviation,
headers: api_client.extended_fetch.headers.to_h, metadata: api_metadata,
body: api_client.extended_fetch_json download: api_download,
} }
end end
end end

View File

@@ -0,0 +1,55 @@
# Page URLs:
# * https://sta.sh/0wxs31o7nn2 (single image)
# * https://sta.sh/21leo8mz87ue (folder)
#
# Image URLs:
# * https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png
#
# Ref:
# * https://github.com/danbooru/danbooru/issues/3877
# * https://www.deviantartsupport.com/en/article/what-is-stash-3391708
# * https://www.deviantart.com/developers/http/v1/20160316/stash_item/4662dd8b10e336486ea9a0b14da62b74
#
module Sources
module Strategies
class Stash < DeviantArt
STASH = %r{\Ahttps?://sta\.sh/(?<post_id>[0-9a-zA-Z]+)}i
def domains
["deviantart.net", "sta.sh"]
end
def match?
parsed_urls.map(&:domain).any?("sta.sh")
end
def site_name
"Sta.sh"
end
def canonical_url
page_url
end
def page_url
"https://sta.sh/#{stash_id}"
end
def api_url
page_url
end
def self.stash_id_from_url(url)
if url =~ STASH
$~[:post_id].downcase
else
nil
end
end
def stash_id
[url, referer_url].map{ |x| self.class.stash_id_from_url(x) }.compact.first
end
end
end
end

View File

@@ -333,6 +333,15 @@ module Danbooru
nil nil
end end
# Register at https://www.deviantart.com/developers/
def deviantart_client_id
nil
end
def deviantart_client_secret
nil
end
# http://tinysubversions.com/notes/mastodon-bot/ # http://tinysubversions.com/notes/mastodon-bot/
def pawoo_client_id def pawoo_client_id
nil nil

View File

@@ -205,6 +205,7 @@ class ArtistTest < ActiveSupport::TestCase
context "when finding deviantart artists" do context "when finding deviantart artists" do
setup do setup do
skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present?
FactoryBot.create(:artist, :name => "artgerm", :url_string => "http://artgerm.deviantart.com/") FactoryBot.create(:artist, :name => "artgerm", :url_string => "http://artgerm.deviantart.com/")
FactoryBot.create(:artist, :name => "trixia", :url_string => "http://trixdraws.deviantart.com/") FactoryBot.create(:artist, :name => "trixia", :url_string => "http://trixdraws.deviantart.com/")
end end

View File

@@ -2,6 +2,11 @@ require 'test_helper'
module Sources module Sources
class DeviantArtTest < ActiveSupport::TestCase class DeviantArtTest < ActiveSupport::TestCase
def setup
super
skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present?
end
context "A page url" do context "A page url" do
setup do setup do
@site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484") @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")

View File

@@ -0,0 +1,58 @@
require 'test_helper'
module Sources
class StashTest < ActiveSupport::TestCase
def setup
super
skip "DeviantArt API keys not set" unless Danbooru.config.deviantart_client_id.present?
end
context "A https://sta.sh/:id page url" do
should "work" do
@site = Sources::Strategies.find("https://sta.sh/0wxs31o7nn2")
assert_equal("noizave", @site.artist_name)
assert_equal("https://www.deviantart.com/noizave", @site.profile_url)
assert_equal("A pepe", @site.artist_commentary_title)
assert_equal("This is a test.", @site.artist_commentary_desc)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url)
assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url)
assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls)
end
end
context "A https://orig00.deviantart.net/* image url" do
context "with a https://sta.sh/:id referer" do
should "work" do
@site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", "https://sta.sh/0wxs31o7nn2")
assert_equal("noizave", @site.artist_name)
assert_equal("https://www.deviantart.com/noizave", @site.profile_url)
assert_equal("A pepe", @site.artist_commentary_title)
assert_equal("This is a test.", @site.artist_commentary_desc)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url)
assert_equal("http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png", @site.image_url)
assert_equal(["http://origin-orig.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png"], @site.image_urls)
end
end
context "without a referer" do
should "use the base deviantart strategy" do
@site = Sources::Strategies.find("https://orig00.deviantart.net/0fd2/f/2018/252/9/c/a_pepe_by_noizave-dcmga0s.png")
# if all we have is the image url, then we can't tell that this is really a sta.sh image.
assert_equal("Deviant Art", @site.site_name)
# this is the wrong page, but there's no way to know the correct sta.sh page without the referer.
assert_equal("https://www.deviantart.com/deviation/763305148", @site.page_url)
end
end
end
end
end