Files
danbooru/app/logical/sources/strategies/deviant_art.rb
evazion e37844303d deviantart: take artist name from url when unavailable from API.
In some cases we can't get the artist name from the API, either because
we can't do the API call because the url doesn't contain a deviation id,
or because the work is deleted:

* http://fc08.deviantart.net/files/f/2007/120/c/9/cool_like_me_by_47ness.jpg (work: http://fav.me/dwcohb)
* https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png (work: http://fav.me/dbpxd58)

Switch to taking the artist name from the url (when present) to deal
with these cases. Fixes the artist finder and the artist url normalizer
to work in this situation.
2018-09-03 18:27:01 -05:00

264 lines
8.9 KiB
Ruby

# Asset URLs:
#
# * http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png
# * http://pre15.deviantart.net/81de/th/pre/f/2015/063/5/f/inha_by_inhaestudios-d8kfzm5.jpg
# * http://th00.deviantart.net/fs71/PRE/f/2014/065/3/b/goruto_by_xyelkiltrox-d797tit.png
#
# * http://th04.deviantart.net/fs70/300W/f/2009/364/4/d/Alphes_Mimic___Rika_by_Juriesute.png
# * http://fc02.deviantart.net/fs48/f/2009/186/2/c/Animation_by_epe_tohri.swf
# * http://fc08.deviantart.net/files/f/2007/120/c/9/Cool_Like_Me_by_47ness.jpg
#
# * http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg
# * http://img04.deviantart.net/720b/i/2003/37/9/6/princess_peach.jpg
#
# * http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg
# * http://fc00.deviantart.net/fs71/f/2013/234/d/8/d84e05f26f0695b1153e9dab3a962f16-d6j8jl9.jpg
# * http://th04.deviantart.net/fs71/PRE/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg
#
# * http://fc09.deviantart.net/fs22/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg
# * http://pre06.deviantart.net/8497/th/pre/f/2009/173/c/c/cc9686111dcffffffb5fcfaf0cf069fb.jpg
#
# Page URLs:
#
# * https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408
# * https://noizave.deviantart.com/art/test-post-please-ignore-685436408
# * https://www.deviantart.com/deviation/685436408
# * https://fav.me/dbc3a48
module Sources
module Strategies
class DeviantArt < Base
ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i
TITLE = %r{(?<title>[a-z0-9_-]+?)}i
ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i
DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i
ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>\w+))?)?\.}i
PATH_ART = %r{\Ahttps?://www\.deviantart\.com/#{ARTIST}/art/#{TITLE}-#{DEVIATION_ID}\z}i
SUBDOMAIN_ART = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/art/#{TITLE}-#{DEVIATION_ID}\z}i
PATH_PROFILE = %r{\Ahttps?://www\.deviantart\.com/#{ARTIST}/?\z}i
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
end
def site_name
"Deviant Art"
end
def canonical_url
if self.class.deviation_id_from_url(image_url).present? || page_url.blank?
image_url
else
page_url
end
end
def image_urls
# work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
if api_deviation.blank?
[url]
# work is downloadable
elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
src = api_download[:src]
src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.gsub!(/\?.*\z/, "") # strip s3 query params
src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
[src]
# work isn't downloadable, or download size is same as regular size.
elsif api_deviation.present?
src = api_deviation.dig(:content, :src)
src = src.gsub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
[src]
else
raise "Couldn't find image url" # this should never happen
end
end
def page_url
return "" if api_deviation.blank?
api_deviation[:url]
end
def profile_url
return "" if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}"
end
# Prefer the name from the url because the api metadata won't be present when
# the input url doesn't contain a deviation id, or the deviation is private or deleted.
def artist_name
if artist_name_from_url.present?
artist_name_from_url
elsif api_metadata.present?
api_metadata.dig(:author, :username)
else
""
end
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalized_for_artist_finder?
url =~ PATH_PROFILE
end
def normalizable_for_artist_finder?
normalize_for_artist_finder.present?
end
def normalize_for_artist_finder
profile_url
end
def tags
if api_metadata.blank?
return []
end
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
# Convert embedded thumbnails of journal posts to 'deviantart #123'
# links. Strip embedded thumbnails of image posts. Example:
# https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
if element.name == "a" && element["data-sigil"] == "thumb"
element.name = "span"
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
if element["class"].split.include?("lit")
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i
element.content = "deviantart ##{deviation_id}"
else
element.content = ""
end
end
if element.name == "a" && element["href"].present?
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
# href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
if uri.present?
uri.scheme ||= "http"
element["href"] = uri.to_s
end
end
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
public
def self.deviation_id_from_url(url)
if url =~ ASSET
$~[:base36_deviation_id].try(:to_i, 36)
elsif url =~ PATH_ART || (url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART)
$~[:deviation_id].to_i
else
nil
end
end
def self.artist_name_from_url(url)
if url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE
$~[:artist]
elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE)
$~[:artist]
else
nil
end
end
def deviation_id
self.class.deviation_id_from_url(url) || self.class.deviation_id_from_url(referer_url)
end
def artist_name_from_url
self.class.artist_name_from_url(url) || self.class.artist_name_from_url(referer_url)
end
def page
return nil if deviation_id.blank?
deviation_url = "https://www.deviantart.com/deviation/#{deviation_id}"
options = Danbooru.config.httparty_options.deep_merge(
format: :plain,
headers: { "Accept-Encoding" => "gzip" }
)
resp = HTTParty.get(deviation_url, **options)
if resp.success?
body = Zlib.gunzip(resp.body)
Nokogiri::HTML(body)
# the work was deleted
elsif resp.code == 404
nil
else
raise HTTParty::ResponseError.new(resp)
end
end
memoize :page
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
# For hidden or deleted works the UUID will be nil.
def uuid
return nil if page.nil?
meta = page.search('meta[property="da:appurl"]').first
return nil if meta.nil?
appurl = meta["content"]
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
uuid
end
memoize :uuid
def api_client
api_client = DeviantArtApiClient.new(
Danbooru.config.deviantart_client_id,
Danbooru.config.deviantart_client_secret,
Danbooru.config.httparty_options
)
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
api_client.access_token.to_hash
end
api_client
end
memoize :api_client
def api_deviation
return {} if uuid.nil?
api_client.deviation(uuid)
end
memoize :api_deviation
def api_metadata
return {} if uuid.nil?
api_client.metadata(uuid)[:metadata].first
end
memoize :api_metadata
def api_download
return {} if uuid.nil?
api_client.download(uuid)
end
memoize :api_download
end
end
end