303 lines
10 KiB
Ruby
303 lines
10 KiB
Ruby
# Asset URLs:
|
|
#
|
|
# * http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png
|
|
# * http://pre15.deviantart.net/81de/th/pre/f/2015/063/5/f/inha_by_inhaestudios-d8kfzm5.jpg
|
|
# * http://th00.deviantart.net/fs71/PRE/f/2014/065/3/b/goruto_by_xyelkiltrox-d797tit.png
|
|
#
|
|
# * http://th04.deviantart.net/fs70/300W/f/2009/364/4/d/Alphes_Mimic___Rika_by_Juriesute.png
|
|
# * http://fc02.deviantart.net/fs48/f/2009/186/2/c/Animation_by_epe_tohri.swf
|
|
# * http://fc08.deviantart.net/files/f/2007/120/c/9/Cool_Like_Me_by_47ness.jpg
|
|
#
|
|
# * http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg
|
|
# * http://img04.deviantart.net/720b/i/2003/37/9/6/princess_peach.jpg
|
|
#
|
|
# * http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg
|
|
# * http://fc00.deviantart.net/fs71/f/2013/234/d/8/d84e05f26f0695b1153e9dab3a962f16-d6j8jl9.jpg
|
|
# * http://th04.deviantart.net/fs71/PRE/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg
|
|
#
|
|
# * http://fc09.deviantart.net/fs22/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg
|
|
# * http://pre06.deviantart.net/8497/th/pre/f/2009/173/c/c/cc9686111dcffffffb5fcfaf0cf069fb.jpg
|
|
#
|
|
# * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
|
|
#
|
|
# Page URLs:
|
|
#
|
|
# * https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408
|
|
# * https://noizave.deviantart.com/art/test-post-please-ignore-685436408
|
|
# * https://www.deviantart.com/deviation/685436408
|
|
# * https://fav.me/dbc3a48
|
|
#
|
|
# Profile URLs:
|
|
#
|
|
# * https://noizave.deviantart.com
|
|
# * https://www.deviantart.com/noizave
|
|
# * https://deviantart.com/noizave
|
|
|
|
module Sources
|
|
module Strategies
|
|
class DeviantArt < Base
|
|
ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i
|
|
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i
|
|
|
|
TITLE = %r{(?<title>[a-z0-9_-]+?)}i
|
|
ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i
|
|
DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i
|
|
|
|
DA_FILENAME = %r{#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\.}i
|
|
WIX_FILENAME = %r{#{TITLE}_by_#{ARTIST}_d(?<base36_deviation_id>[a-z0-9]+)-[a-z0-9]+\.}i
|
|
|
|
DA_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/#{DA_FILENAME}}i
|
|
WIX_ASSET = %r{\Ahttps?://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/.+/#{WIX_FILENAME}}i
|
|
ASSET = Regexp.union(DA_ASSET, WIX_ASSET)
|
|
|
|
PATH_ART = %r{\Ahttps?://www\.deviantart\.com/#{ARTIST}/art/#{TITLE}-#{DEVIATION_ID}\z}i
|
|
SUBDOMAIN_ART = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/art/#{TITLE}-#{DEVIATION_ID}\z}i
|
|
|
|
PATH_PROFILE = %r{\Ahttps?://(www\.)?deviantart\.com/#{ARTIST}/?\z}i
|
|
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
|
|
|
|
def domains
|
|
["deviantart.net", "deviantart.com"]
|
|
end
|
|
|
|
def site_name
|
|
"Deviant Art"
|
|
end
|
|
|
|
def match?
|
|
return false if parsed_url.nil?
|
|
parsed_url.domain.in?(domains) || parsed_url.host == "images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com"
|
|
end
|
|
|
|
def canonical_url
|
|
if self.class.deviation_id_from_url(image_url).present? || page_url.blank?
|
|
image_url
|
|
else
|
|
page_url
|
|
end
|
|
end
|
|
|
|
def image_urls
|
|
# work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
|
|
if api_deviation.blank?
|
|
[url]
|
|
# work is downloadable
|
|
elsif api_deviation[:is_downloadable]
|
|
src = api_download[:src]
|
|
src.sub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
|
|
src.sub!(/\?.*\z/, "") # strip s3 query params
|
|
src.sub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
|
|
[src]
|
|
# work isn't downloadable, or download size is same as regular size.
|
|
elsif api_deviation.present?
|
|
src = api_deviation.dig(:content, :src)
|
|
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/
|
|
src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1')
|
|
src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "")
|
|
end
|
|
src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
|
|
src = src.sub(%r!q_\d+!, "q_100")
|
|
[src]
|
|
else
|
|
raise "Couldn't find image url" # this should never happen
|
|
end
|
|
end
|
|
|
|
def page_url
|
|
if api_deviation.present?
|
|
api_deviation[:url]
|
|
elsif api_url.present?
|
|
api_url
|
|
else
|
|
""
|
|
end
|
|
end
|
|
|
|
def profile_url
|
|
return "" if artist_name.blank?
|
|
"https://www.deviantart.com/#{artist_name.downcase}"
|
|
end
|
|
|
|
# Prefer the name from the url because the api metadata won't be present when
|
|
# the input url doesn't contain a deviation id, or the deviation is private or deleted.
|
|
def artist_name
|
|
if artist_name_from_url.present?
|
|
artist_name_from_url
|
|
elsif api_metadata.present?
|
|
api_metadata.dig(:author, :username)
|
|
else
|
|
""
|
|
end
|
|
end
|
|
|
|
def artist_commentary_title
|
|
api_metadata[:title]
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
api_metadata[:description]
|
|
end
|
|
|
|
def normalized_for_artist_finder?
|
|
url == normalize_for_artist_finder
|
|
end
|
|
|
|
def normalizable_for_artist_finder?
|
|
normalize_for_artist_finder.present?
|
|
end
|
|
|
|
def normalize_for_artist_finder
|
|
profile_url
|
|
end
|
|
|
|
def tags
|
|
if api_metadata.blank?
|
|
return []
|
|
end
|
|
|
|
api_metadata[:tags].map do |tag|
|
|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
|
|
end
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
DText.from_html(artist_commentary_desc) do |element|
|
|
# Convert embedded thumbnails of journal posts to 'deviantart #123'
|
|
# links. Strip embedded thumbnails of image posts. Example:
|
|
# https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
|
|
if element.name == "a" && element["data-sigil"] == "thumb"
|
|
element.name = "span"
|
|
|
|
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
|
|
if element["class"].split.include?("lit")
|
|
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i
|
|
element.content = "deviantart ##{deviation_id}"
|
|
else
|
|
element.content = ""
|
|
end
|
|
end
|
|
|
|
if element.name == "a" && element["href"].present?
|
|
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
|
|
|
|
# href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
|
|
uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
|
|
if uri.present? && uri.path.present?
|
|
uri.scheme ||= "http"
|
|
element["href"] = uri.to_s
|
|
end
|
|
end
|
|
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
|
|
end
|
|
|
|
public
|
|
|
|
def self.deviation_id_from_url(url)
|
|
if url =~ ASSET
|
|
$~[:base36_deviation_id].try(:to_i, 36)
|
|
elsif url =~ PATH_ART || (url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART)
|
|
$~[:deviation_id].to_i
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def self.artist_name_from_url(url)
|
|
if url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE
|
|
$~[:artist].try(:dasherize)
|
|
elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE)
|
|
$~[:artist]
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def deviation_id
|
|
self.class.deviation_id_from_url(url) || self.class.deviation_id_from_url(referer_url)
|
|
end
|
|
|
|
def artist_name_from_url
|
|
self.class.artist_name_from_url(url) || self.class.artist_name_from_url(referer_url)
|
|
end
|
|
|
|
def api_url
|
|
return nil if deviation_id.blank?
|
|
"https://www.deviantart.com/deviation/#{deviation_id}"
|
|
end
|
|
|
|
def page
|
|
return nil if api_url.blank?
|
|
|
|
options = Danbooru.config.httparty_options.deep_merge(
|
|
format: :plain,
|
|
headers: { "Accept-Encoding" => "gzip" }
|
|
)
|
|
resp = HTTParty.get(api_url, **options)
|
|
|
|
if resp.success?
|
|
body = Zlib.gunzip(resp.body)
|
|
Nokogiri::HTML(body)
|
|
# the work was deleted
|
|
elsif resp.code == 404
|
|
nil
|
|
else
|
|
raise HTTParty::ResponseError.new(resp)
|
|
end
|
|
end
|
|
memoize :page
|
|
|
|
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
|
|
# For hidden or deleted works the UUID will be nil.
|
|
def uuid
|
|
return nil if page.nil?
|
|
meta = page.search('meta[property="da:appurl"]').first
|
|
return nil if meta.nil?
|
|
|
|
appurl = meta["content"]
|
|
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
|
|
uuid
|
|
end
|
|
memoize :uuid
|
|
|
|
def api_client
|
|
api_client = DeviantArtApiClient.new(
|
|
Danbooru.config.deviantart_client_id,
|
|
Danbooru.config.deviantart_client_secret,
|
|
Danbooru.config.httparty_options
|
|
)
|
|
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
|
|
api_client.access_token.to_hash
|
|
end
|
|
api_client
|
|
end
|
|
memoize :api_client
|
|
|
|
def api_deviation
|
|
return {} if uuid.nil?
|
|
api_client.deviation(uuid)
|
|
end
|
|
memoize :api_deviation
|
|
|
|
def api_metadata
|
|
return {} if uuid.nil?
|
|
api_client.metadata(uuid)[:metadata].first
|
|
end
|
|
memoize :api_metadata
|
|
|
|
def api_download
|
|
return {} if uuid.nil?
|
|
api_client.download(uuid)
|
|
end
|
|
memoize :api_download
|
|
|
|
def api_response
|
|
{
|
|
deviation: api_deviation,
|
|
metadata: api_metadata,
|
|
download: api_download,
|
|
}
|
|
end
|
|
end
|
|
end
|
|
end
|