Get rid of `normalized_for_artist_finder?` and `normalizable_for_artist_finder?`. This was legacy bullshit that was originally designed to avoid API calls when saving artist entries containing old Pixiv direct image urls that had already been normalized, or that couldn't be normalized because they were bad id. Nowadays we store profile urls in artist entries instead of direct image urls, so we don't normally need to do any API calls to normalize the profile url. Strategies should take care to avoid triggering API calls inside `profile_url` when possible.
327 lines
13 KiB
Ruby
327 lines
13 KiB
Ruby
# Asset URLs:
|
|
#
|
|
## NORMALIZABLE
|
|
## * http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png
|
|
## * http://pre15.deviantart.net/81de/th/pre/f/2015/063/5/f/inha_by_inhaestudios-d8kfzm5.jpg
|
|
## * http://th00.deviantart.net/fs71/PRE/f/2014/065/3/b/goruto_by_xyelkiltrox-d797tit.png
|
|
##
|
|
## * http://fc00.deviantart.net/fs71/f/2013/234/d/8/d84e05f26f0695b1153e9dab3a962f16-d6j8jl9.jpg
|
|
## * http://th04.deviantart.net/fs71/PRE/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg
|
|
##
|
|
## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/52c4a3ad-d416-42f0-90f6-570983e36797/dczr28f-bd255304-01bf-4765-8cd3-e53983d3f78a.jpg
|
|
## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
|
|
## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc
|
|
## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/fe7ab27f-7530-4252-99ef-2baaf81b36fd/dddf6pe-1a4a091c-768c-4395-9465-5d33899be1eb.png/v1/fill/w_800,h_1130,q_80,strp/stay_hydrated_and_in_the_shade_by_raikoart_dddf6pe-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTEzMCIsInBhdGgiOiJcL2ZcL2ZlN2FiMjdmLTc1MzAtNDI1Mi05OWVmLTJiYWFmODFiMzZmZFwvZGRkZjZwZS0xYTRhMDkxYy03NjhjLTQzOTUtOTQ2NS01ZDMzODk5YmUxZWIucG5nIiwid2lkdGgiOiI8PTgwMCJ9XV0sImF1ZCI6WyJ1cm46c2VydmljZTppbWFnZS5vcGVyYXRpb25zIl19.J0W4k-iV6Mg8Kt_5Lr_L_JbBq4lyr7aCausWWJ_Fsbw
|
|
#
|
|
## * http://www.deviantart.com/download/135944599/Touhou___Suwako_Moriya_Colored_by_Turtle_Chibi.png
|
|
## * https://www.deviantart.com/download/549677536/countdown_to_midnight_by_kawacy-d939hwg.jpg?token=92090cd3910d52089b566661e8c2f749755ed5f8&ts=1438535525
|
|
#
|
|
## NOT NORMALIZABLE
|
|
## * http://th04.deviantart.net/fs70/300W/f/2009/364/4/d/Alphes_Mimic___Rika_by_Juriesute.png
|
|
## * http://fc02.deviantart.net/fs48/f/2009/186/2/c/Animation_by_epe_tohri.swf
|
|
## * http://fc08.deviantart.net/files/f/2007/120/c/9/Cool_Like_Me_by_47ness.jpg
|
|
##
|
|
## * http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg
|
|
## * http://img04.deviantart.net/720b/i/2003/37/9/6/princess_peach.jpg
|
|
##
|
|
## * http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg
|
|
## * http://other00.deviantart.net/8863/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg
|
|
## * http://fc09.deviantart.net/fs22/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg
|
|
## * http://pre06.deviantart.net/8497/th/pre/f/2009/173/c/c/cc9686111dcffffffb5fcfaf0cf069fb.jpg
|
|
#
|
|
########################
|
|
#
|
|
# Page URLs:
|
|
#
|
|
# * https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408
|
|
# * https://noizave.deviantart.com/art/test-post-please-ignore-685436408
|
|
# * https://www.deviantart.com/deviation/685436408
|
|
# * https://fav.me/dbc3a48
|
|
#
|
|
# Profile URLs:
|
|
#
|
|
# * https://noizave.deviantart.com
|
|
# * https://www.deviantart.com/noizave
|
|
# * https://deviantart.com/noizave
|
|
|
|
module Sources
|
|
module Strategies
|
|
class DeviantArt < Base
|
|
ASSET_SUBDOMAINS = %r{(?:fc|th|pre|img|orig|origin-orig)\d*}i
|
|
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i
|
|
MAIN_DOMAIN = %r{\Ahttps?://(?:www\.)?deviantart.com}i
|
|
|
|
TITLE = %r{(?<title>[a-z0-9_-]+?)}i
|
|
ARTIST = %r{(?<artist>[a-z0-9_-]+?)}i
|
|
DEVIATION_ID = %r{(?<deviation_id>[0-9]+)}i
|
|
|
|
DA_FILENAME_1 = %r{[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\.}i
|
|
DA_FILENAME_2 = %r{#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\.}i
|
|
DA_FILENAME = Regexp.union(DA_FILENAME_1, DA_FILENAME_2)
|
|
WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i
|
|
|
|
NOT_NORMALIZABLE_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/[0-9a-f]{32}(?:-[^d]\w+)?\.}i
|
|
|
|
DA_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/#{DA_FILENAME}}i
|
|
WIX_ASSET = %r{\Ahttps?://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/(?:intermediary/)?\w/[0-9a-f-]+/#{WIX_FILENAME}}i
|
|
ASSET = Regexp.union(DA_ASSET, WIX_ASSET)
|
|
|
|
DA_DOWNLOAD = %r{#{MAIN_DOMAIN}/download/#{DEVIATION_ID}/#{DA_FILENAME_2}?}i
|
|
|
|
DEVIATION_ART = %r{#{MAIN_DOMAIN}/deviation/#{DEVIATION_ID}\z}i
|
|
PATH_ART = %r{#{MAIN_DOMAIN}/#{ARTIST}/art/#{TITLE}-#{DEVIATION_ID}\z}i
|
|
SUBDOMAIN_ART = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/art/#{TITLE}-#{DEVIATION_ID}\z}i
|
|
|
|
PATH_PROFILE = %r{#{MAIN_DOMAIN}/#{ARTIST}/?\z}i
|
|
SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i
|
|
|
|
FAVME = %r{\Ahttps?://(www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i
|
|
|
|
def domains
|
|
["deviantart.net", "deviantart.com", "fav.me"]
|
|
end
|
|
|
|
def site_name
|
|
"Deviant Art"
|
|
end
|
|
|
|
def match?
|
|
return false if parsed_url.nil?
|
|
parsed_url.domain.in?(domains) || parsed_url.host == "images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com"
|
|
end
|
|
|
|
def image_urls
|
|
[image_url]
|
|
end
|
|
|
|
def image_url
|
|
# work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
|
|
if api_deviation.blank?
|
|
url
|
|
elsif api_deviation[:is_downloadable]
|
|
api_download[:src]
|
|
elsif api_deviation[:flash].present?
|
|
api_deviation.dig(:flash, :src)
|
|
elsif api_deviation[:videos].present?
|
|
api_deviation[:videos].max_by { |x| x[:filesize] }[:src]
|
|
else
|
|
src = api_deviation.dig(:content, :src)
|
|
if deviation_id && deviation_id.to_i <= 790677560 && src =~ /^https:\/\/images-wixmp-/ && src !~ /\.gif\?/
|
|
src = src.sub(%r!(/f/[a-f0-9-]+/[a-f0-9-]+)!, '/intermediary\1')
|
|
src = src.sub(%r!/v1/(fit|fill)/.*\z!i, "")
|
|
end
|
|
src = src.sub(%r!\Ahttps?://orig\d+\.deviantart\.net!i, "http://origin-orig.deviantart.net")
|
|
src = src.gsub(%r!q_\d+,strp!, "q_100")
|
|
src
|
|
end
|
|
end
|
|
|
|
def page_url
|
|
if api_deviation.present?
|
|
api_deviation[:url]
|
|
elsif deviation_id.present?
|
|
page_url_from_image_url
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def page_url_from_image_url
|
|
artist, title, id = artist_name_from_url, title_from_url, deviation_id
|
|
|
|
if artist.present? && title.present? && id.present?
|
|
"https://www.deviantart.com/#{artist}/art/#{title}-#{id}"
|
|
elsif id.present?
|
|
"https://www.deviantart.com/deviation/#{id}"
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def normalize_for_source
|
|
page_url_from_image_url
|
|
end
|
|
|
|
def profile_url
|
|
return nil if artist_name.blank?
|
|
"https://www.deviantart.com/#{artist_name.downcase}"
|
|
end
|
|
|
|
# Prefer the name from the url because the api metadata won't be present when
|
|
# the input url doesn't contain a deviation id, or the deviation is private or deleted.
|
|
def artist_name
|
|
if artist_name_from_url.present?
|
|
artist_name_from_url
|
|
elsif api_metadata.present?
|
|
api_metadata.dig(:author, :username)
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def artist_commentary_title
|
|
api_metadata[:title]
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
api_metadata[:description]
|
|
end
|
|
|
|
def tags
|
|
if api_metadata.blank?
|
|
return []
|
|
end
|
|
|
|
api_metadata[:tags].map do |tag|
|
|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
|
|
end
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
DText.from_html(artist_commentary_desc) do |element|
|
|
# Convert embedded thumbnails of journal posts to 'deviantart #123'
|
|
# links. Strip embedded thumbnails of image posts. Example:
|
|
# https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
|
|
if element.name == "a" && element["data-sigil"] == "thumb"
|
|
element.name = "span"
|
|
|
|
# <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
|
|
if element["class"].split.include?("lit")
|
|
deviation_id = element["href"][%r!-(\d+)\z!, 1].to_i
|
|
element.content = "deviantart ##{deviation_id}"
|
|
else
|
|
element.content = ""
|
|
end
|
|
end
|
|
|
|
if element.name == "a" && element["href"].present?
|
|
element["href"] = element["href"].gsub(%r!\Ahttps?://www\.deviantart\.com/users/outgoing\?!i, "")
|
|
|
|
# href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
|
|
uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
|
|
if uri.present? && uri.path.present?
|
|
uri.scheme ||= "http"
|
|
element["href"] = uri.to_s
|
|
end
|
|
end
|
|
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
|
|
end
|
|
|
|
def self.deviation_id_from_url(url)
|
|
if url =~ NOT_NORMALIZABLE_ASSET
|
|
nil
|
|
elsif url =~ ASSET || url =~ FAVME
|
|
$~[:base36_deviation_id].try(:to_i, 36)
|
|
elsif url =~ PATH_ART || (url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART) || url =~ DA_DOWNLOAD || url =~ DEVIATION_ART
|
|
$~[:deviation_id].to_i
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def self.artist_name_from_url(url)
|
|
if url =~ NOT_NORMALIZABLE_ASSET
|
|
nil
|
|
elsif url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE || url =~ DA_DOWNLOAD
|
|
$~[:artist].try(:dasherize)
|
|
elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE)
|
|
$~[:artist]
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def self.title_from_url(url)
|
|
if url =~ NOT_NORMALIZABLE_ASSET
|
|
nil
|
|
elsif url =~ ASSET || url =~ PATH_ART || url =~ DA_DOWNLOAD
|
|
$~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence
|
|
elsif url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART
|
|
$~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def deviation_id
|
|
self.class.deviation_id_from_url(url) || self.class.deviation_id_from_url(referer_url)
|
|
end
|
|
|
|
def artist_name_from_url
|
|
self.class.artist_name_from_url(url) || self.class.artist_name_from_url(referer_url)
|
|
end
|
|
|
|
def title_from_url
|
|
self.class.title_from_url(url) || self.class.title_from_url(referer_url)
|
|
end
|
|
|
|
def page
|
|
return nil if page_url_from_image_url.blank?
|
|
|
|
resp = Danbooru::Http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1})
|
|
|
|
if resp.status.success?
|
|
Nokogiri::HTML(resp.body.to_s)
|
|
# the work was deleted
|
|
elsif resp.code == 404
|
|
nil
|
|
else
|
|
raise "failed to fetch page (got code #{resp.code})"
|
|
end
|
|
end
|
|
memoize :page
|
|
|
|
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
|
|
# For hidden or deleted works the UUID will be nil.
|
|
def uuid
|
|
return nil if page.nil?
|
|
meta = page.at_css('meta[property="da:appurl"]')
|
|
return nil if meta.nil?
|
|
|
|
appurl = meta["content"]
|
|
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
|
|
uuid
|
|
end
|
|
memoize :uuid
|
|
|
|
def api_client
|
|
api_client = DeviantArtApiClient.new(
|
|
Danbooru.config.deviantart_client_id,
|
|
Danbooru.config.deviantart_client_secret
|
|
)
|
|
api_client.access_token = Cache.get("da-access-token", 11.weeks) do
|
|
api_client.access_token.to_hash
|
|
end
|
|
api_client
|
|
end
|
|
memoize :api_client
|
|
|
|
def api_deviation
|
|
return {} if uuid.nil?
|
|
api_client.deviation(uuid)
|
|
end
|
|
memoize :api_deviation
|
|
|
|
def api_metadata
|
|
return {} if uuid.nil?
|
|
api_client.metadata(uuid)[:metadata].first
|
|
end
|
|
memoize :api_metadata
|
|
|
|
def api_download
|
|
return {} unless uuid.present? && api_deviation[:is_downloadable]
|
|
api_client.download(uuid)
|
|
end
|
|
memoize :api_download
|
|
|
|
def api_response
|
|
{
|
|
deviation: api_deviation,
|
|
metadata: api_metadata,
|
|
download: api_download
|
|
}
|
|
end
|
|
end
|
|
end
|
|
end
|