Files
danbooru/app/logical/sources/strategies/deviant_art.rb
lllusion3469 9205c32424 deviantart: revert to 7f482dc35b
that's the latest commit made to deviantart files before switching from
the developer API to the Javascript backend from the new "Eclipse"
frontend.
This is necessary because it's basically impossible to download posts
now with the JS backend without being logged in, i.e. having the cookies
from a logged in user, which can't be used for very long even if
exporting them from a browser. You would have to save the cookies
deviantart sends you back via the "Set-Cookie" header in a database
somewhere in addition to the other added complexity.

also
* (temporarily) replace HttpartyCache with HTTParty as it's long been
  removed
* fix one case of "last argument as keyword parameter"
* change repository url (5d1a1cc87e)
* remove self-explanatory comment
2020-05-11 16:09:00 +02:00

328 lines
12 KiB
Ruby

# Asset URLs:
#
# * http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png
# * http://pre15.deviantart.net/81de/th/pre/f/2015/063/5/f/inha_by_inhaestudios-d8kfzm5.jpg
# * http://th00.deviantart.net/fs71/PRE/f/2014/065/3/b/goruto_by_xyelkiltrox-d797tit.png
#
# * http://th04.deviantart.net/fs70/300W/f/2009/364/4/d/Alphes_Mimic___Rika_by_Juriesute.png
# * http://fc02.deviantart.net/fs48/f/2009/186/2/c/Animation_by_epe_tohri.swf
# * http://fc08.deviantart.net/files/f/2007/120/c/9/Cool_Like_Me_by_47ness.jpg
#
# * http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg
# * http://img04.deviantart.net/720b/i/2003/37/9/6/princess_peach.jpg
#
# * http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg
# * http://fc00.deviantart.net/fs71/f/2013/234/d/8/d84e05f26f0695b1153e9dab3a962f16-d6j8jl9.jpg
# * http://th04.deviantart.net/fs71/PRE/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg
#
# * http://fc09.deviantart.net/fs22/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg
# * http://pre06.deviantart.net/8497/th/pre/f/2009/173/c/c/cc9686111dcffffffb5fcfaf0cf069fb.jpg
#
# * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
# * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/fe7ab27f-7530-4252-99ef-2baaf81b36fd/dddf6pe-1a4a091c-768c-4395-9465-5d33899be1eb.png/v1/fill/w_800,h_1130,q_80,strp/stay_hydrated_and_in_the_shade_by_raikoart_dddf6pe-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTEzMCIsInBhdGgiOiJcL2ZcL2ZlN2FiMjdmLTc1MzAtNDI1Mi05OWVmLTJiYWFmODFiMzZmZFwvZGRkZjZwZS0xYTRhMDkxYy03NjhjLTQzOTUtOTQ2NS01ZDMzODk5YmUxZWIucG5nIiwid2lkdGgiOiI8PTgwMCJ9XV0sImF1ZCI6WyJ1cm46c2VydmljZTppbWFnZS5vcGVyYXRpb25zIl19.J0W4k-iV6Mg8Kt_5Lr_L_JbBq4lyr7aCausWWJ_Fsbw
#
# Page URLs:
#
# * https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408
# * https://noizave.deviantart.com/art/test-post-please-ignore-685436408
# * https://www.deviantart.com/deviation/685436408
# * https://fav.me/dbc3a48
#
# Profile URLs:
#
# * https://noizave.deviantart.com
# * https://www.deviantart.com/noizave
# * https://deviantart.com/noizave
module Sources
module Strategies
class DeviantArt < Base
ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i
TITLE = %r{(?<title>[a-z0-9_-]+?)}i
ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i
DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i
DA_FILENAME = %r{#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\.}i
WIX_FILENAME = %r{#{TITLE}_by_#{ARTIST}_d(?<base36_deviation_id>[a-z0-9]+)-[a-z0-9]+\.}i
DA_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/#{DA_FILENAME}}i
WIX_ASSET = %r{\Ahttps?://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/.+/#{WIX_FILENAME}}i
ASSET = Regexp.union(DA_ASSET, WIX_ASSET)
PATH_ART = %r{\Ahttps?://www\.deviantart\.com/#{ARTIST}/art/#{TITLE}-#{DEVIATION_ID}\z}i
SUBDOMAIN_ART = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/art/#{TITLE}-#{DEVIATION_ID}\z}i
PATH_PROFILE = %r{\Ahttps?://(www\.)?deviantart\.com/#{ARTIST}/?\z}i
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
def domains
["deviantart.net", "deviantart.com"]
end
def site_name
"Deviant Art"
end
def match?
return false if parsed_url.nil?
parsed_url.domain.in?(domains) || parsed_url.host == "images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com"
end
def canonical_url
if self.class.deviation_id_from_url(image_url).present? || page_url.blank?
image_url
else
page_url
end
end
def image_urls
# work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
if api_deviation.blank?
[url]
elsif api_deviation[:is_downloadable]
src = api_download[:src]
src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.sub!(/\?.*\z/, "") # strip s3 query params
src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
[src]
elsif api_deviation.present?
src = api_deviation.dig(:content, :src)
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/
src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1')
src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "")
end
src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
src = src.sub(%r!q_\d+!, "q_100")
[src]
else
raise "Couldn't find image url" # this should never happen
end
end
def page_url
if api_deviation.present?
api_deviation[:url]
elsif api_url.present?
api_url
else
""
end
end
def page_url_from_image_url
artist, title, id = artist_name_from_url, title_from_url, deviation_id
if artist.present? && title.present? && id.present?
"https://www.deviantart.com/#{artist}/art/#{title}-#{id}"
elsif id.present?
"https://deviantart.com/deviation/#{id}"
else
nil
end
end
def profile_url
return "" if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}"
end
# Prefer the name from the url because the api metadata won't be present when
# the input url doesn't contain a deviation id, or the deviation is private or deleted.
def artist_name
if artist_name_from_url.present?
artist_name_from_url
elsif api_metadata.present?
api_metadata.dig(:author, :username)
else
""
end
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalized_for_artist_finder?
url == normalize_for_artist_finder
end
def normalizable_for_artist_finder?
normalize_for_artist_finder.present?
end
def normalize_for_artist_finder
profile_url
end
def tags
if api_metadata.blank?
return []
end
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
# Convert embedded thumbnails of journal posts to 'deviantart #123'
# links. Strip embedded thumbnails of image posts. Example:
# https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
if element.name == "a" && element["data-sigil"] == "thumb"
element.name = "span"
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
if element["class"].split.include?("lit")
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i
element.content = "deviantart ##{deviation_id}"
else
element.content = ""
end
end
if element.name == "a" && element["href"].present?
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
# href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
if uri.present? && uri.path.present?
uri.scheme ||= "http"
element["href"] = uri.to_s
end
end
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
public
def self.deviation_id_from_url(url)
if url =~ ASSET
$~[:base36_deviation_id].try(:to_i, 36)
elsif url =~ PATH_ART || (url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART)
$~[:deviation_id].to_i
else
nil
end
end
def self.artist_name_from_url(url)
if url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE
$~[:artist].try(:dasherize)
elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE)
$~[:artist]
else
nil
end
end
def self.title_from_url(url)
if url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE
$~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence
elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE)
$~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence
else
nil
end
end
def deviation_id
self.class.deviation_id_from_url(url) || self.class.deviation_id_from_url(referer_url)
end
def artist_name_from_url
self.class.artist_name_from_url(url) || self.class.artist_name_from_url(referer_url)
end
def title_from_url
self.class.title_from_url(url) || self.class.title_from_url(referer_url)
end
def api_url
return nil if deviation_id.blank?
"https://www.deviantart.com/deviation/#{deviation_id}"
end
def page
return nil if api_url.blank?
options = Danbooru.config.httparty_options.deep_merge(
format: :plain,
headers: { "Accept-Encoding" => "gzip" }
)
resp = HTTParty.get(api_url, **options)
if resp.success?
body = Zlib.gunzip(resp.body)
Nokogiri::HTML(body)
# the work was deleted
elsif resp.code == 404
nil
else
raise HTTParty::ResponseError.new(resp)
end
end
memoize :page
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
# For hidden or deleted works the UUID will be nil.
def uuid
return nil if page.nil?
meta = page.search('meta[property="da:appurl"]').first
return nil if meta.nil?
appurl = meta["content"]
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
uuid
end
memoize :uuid
def api_client
api_client = DeviantArtApiClient.new(
Danbooru.config.deviantart_client_id,
Danbooru.config.deviantart_client_secret,
Danbooru.config.httparty_options
)
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
api_client.access_token.to_hash
end
api_client
end
memoize :api_client
def api_deviation
return {} if uuid.nil?
api_client.deviation(uuid)
end
memoize :api_deviation
def api_metadata
return {} if uuid.nil?
api_client.metadata(uuid)[:metadata].first
end
memoize :api_metadata
def api_download
return {} if uuid.nil?
api_client.download(uuid)
end
memoize :api_download
def api_response
{
deviation: api_deviation,
metadata: api_metadata,
download: api_download,
}
end
end
end
end