# Asset URLs: # ## NORMALIZABLE ## * http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png ## * http://pre15.deviantart.net/81de/th/pre/f/2015/063/5/f/inha_by_inhaestudios-d8kfzm5.jpg ## * http://th00.deviantart.net/fs71/PRE/f/2014/065/3/b/goruto_by_xyelkiltrox-d797tit.png ## ## * http://fc00.deviantart.net/fs71/f/2013/234/d/8/d84e05f26f0695b1153e9dab3a962f16-d6j8jl9.jpg ## * http://th04.deviantart.net/fs71/PRE/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg ## ## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/52c4a3ad-d416-42f0-90f6-570983e36797/dczr28f-bd255304-01bf-4765-8cd3-e53983d3f78a.jpg ## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg ## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc ## * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/fe7ab27f-7530-4252-99ef-2baaf81b36fd/dddf6pe-1a4a091c-768c-4395-9465-5d33899be1eb.png/v1/fill/w_800,h_1130,q_80,strp/stay_hydrated_and_in_the_shade_by_raikoart_dddf6pe-fullview.jpg?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7ImhlaWdodCI6Ijw9MTEzMCIsInBhdGgiOiJcL2ZcL2ZlN2FiMjdmLTc1MzAtNDI1Mi05OWVmLTJiYWFmODFiMzZmZFwvZGRkZjZwZS0xYTRhMDkxYy03NjhjLTQzOTUtOTQ2NS01ZDMzODk5YmUxZWIucG5nIiwid2lkdGgiOiI8PTgwMCJ9XV0sImF1ZCI6WyJ1cm46c2VydmljZTppbWFnZS5vcGVyYXRpb25zIl19.J0W4k-iV6Mg8Kt_5Lr_L_JbBq4lyr7aCausWWJ_Fsbw # ## * http://www.deviantart.com/download/135944599/Touhou___Suwako_Moriya_Colored_by_Turtle_Chibi.png ## * https://www.deviantart.com/download/549677536/countdown_to_midnight_by_kawacy-d939hwg.jpg?token=92090cd3910d52089b566661e8c2f749755ed5f8&ts=1438535525 # ## NOT NORMALIZABLE ## * http://th04.deviantart.net/fs70/300W/f/2009/364/4/d/Alphes_Mimic___Rika_by_Juriesute.png ## * http://fc02.deviantart.net/fs48/f/2009/186/2/c/Animation_by_epe_tohri.swf ## * http://fc08.deviantart.net/files/f/2007/120/c/9/Cool_Like_Me_by_47ness.jpg ## ## * http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg ## * http://img04.deviantart.net/720b/i/2003/37/9/6/princess_peach.jpg ## ## * http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg ## * http://other00.deviantart.net/8863/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg ## * http://fc09.deviantart.net/fs22/o/2009/197/3/7/37ac79eaeef9fb32e6ae998e9a77d8dd.jpg ## * http://pre06.deviantart.net/8497/th/pre/f/2009/173/c/c/cc9686111dcffffffb5fcfaf0cf069fb.jpg # ######################## # # Page URLs: # # * https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408 # * https://noizave.deviantart.com/art/test-post-please-ignore-685436408 # * https://www.deviantart.com/deviation/685436408 # * https://fav.me/dbc3a48 # # Profile URLs: # # * https://noizave.deviantart.com # * https://www.deviantart.com/noizave # * https://deviantart.com/noizave module Sources module Strategies class DeviantArt < Base ASSET_SUBDOMAINS = /(?:fc|th|pre|img|orig|origin-orig)\d*/i RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:#{ASSET_SUBDOMAINS}|www)\.}i MAIN_DOMAIN = %r{\Ahttps?://(?:www\.)?deviantart.com}i TITLE = /(?[a-z0-9_-]+?)/i ARTIST = /(?<artist>[a-z0-9_-]+?)/i DEVIATION_ID = /(?<deviation_id>[0-9]+)/i DA_FILENAME_1 = /[a-f0-9]{32}-d(?<base36_deviation_id>[a-z0-9]+)\./i DA_FILENAME_2 = /#{TITLE}(?:_by_#{ARTIST}(?:-d(?<base36_deviation_id>[a-z0-9]+))?)?\./i DA_FILENAME = Regexp.union(DA_FILENAME_1, DA_FILENAME_2) WIX_FILENAME = %r{d(?<base36_deviation_id>[a-z0-9]+)[0-9a-f-]+\.\w+(?:/\w+/\w+/[\w,]+/(?<title>[\w-]+)_by_(?<artist>[\w-]+)_d\w+-\w+\.\w+)?.+}i NOT_NORMALIZABLE_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/[0-9a-f]{32}(?:-[^d]\w+)?\.}i DA_ASSET = %r{\Ahttps?://#{ASSET_SUBDOMAINS}\.deviantart\.net/.+/#{DA_FILENAME}}i WIX_ASSET = %r{\Ahttps?://images-wixmp-ed30a86b8c4ca887773594c2\.wixmp\.com/(?:intermediary/)?\w/[0-9a-f-]+/#{WIX_FILENAME}}i ASSET = Regexp.union(DA_ASSET, WIX_ASSET) DA_DOWNLOAD = %r{#{MAIN_DOMAIN}/download/#{DEVIATION_ID}/#{DA_FILENAME_2}?}i DEVIATION_ART = %r{#{MAIN_DOMAIN}/deviation/#{DEVIATION_ID}\z}i PATH_ART = %r{#{MAIN_DOMAIN}/#{ARTIST}/art/#{TITLE}-#{DEVIATION_ID}\z}i SUBDOMAIN_ART = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/art/#{TITLE}-#{DEVIATION_ID}\z}i PATH_PROFILE = %r{#{MAIN_DOMAIN}/#{ARTIST}/?\z}i SUBDOMAIN_PROFILE = %r{\Ahttps?://#{ARTIST}\.deviantart\.com/?\z}i FAVME = %r{\Ahttps?://(?:www\.)?fav\.me/d(?<base36_deviation_id>[a-z0-9]+)\z}i def domains ["deviantart.net", "deviantart.com", "fav.me"] end def wixmp_hosts ["images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com", "api-da.wixmp.com"] end def site_name "Deviant Art" end def match? return false if parsed_url.nil? parsed_url.domain.in?(domains) || parsed_url.host.in?(wixmp_hosts) end def image_urls [image_url] end def image_url # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? url elsif api_deviation[:is_downloadable] api_download[:src] elsif api_deviation[:flash].present? api_deviation.dig(:flash, :src) elsif api_deviation[:videos].present? api_deviation[:videos].max_by { |x| x[:filesize] }[:src] else src = api_deviation.dig(:content, :src) if deviation_id && deviation_id.to_i <= 790_677_560 && src =~ %r{\Ahttps://images-wixmp-} && src !~ /\.gif\?/ src = src.sub(%r{(/f/[a-f0-9-]+/[a-f0-9-]+)}, '/intermediary\1') src = src.sub(%r{/v1/(fit|fill)/.*\z}i, "") end src = src.sub(%r{\Ahttps?://orig\d+\.deviantart\.net}i, "http://origin-orig.deviantart.net") src = src.gsub(/q_\d+,strp/, "q_100") src end end def page_url if api_deviation.present? api_deviation[:url] elsif deviation_id.present? page_url_from_image_url else nil end end def page_url_from_image_url artist, title, id = artist_name_from_url, title_from_url, deviation_id if artist.present? && title.present? && id.present? "https://www.deviantart.com/#{artist}/art/#{title}-#{id}" elsif id.present? "https://www.deviantart.com/deviation/#{id}" else nil end end def normalize_for_source page_url_from_image_url end def profile_url return nil if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" end # Prefer the name from the url because the api metadata won't be present when # the input url doesn't contain a deviation id, or the deviation is private or deleted. def artist_name if artist_name_from_url.present? artist_name_from_url elsif api_metadata.present? api_metadata.dig(:author, :username) else nil end end def artist_commentary_title api_metadata[:title] end def artist_commentary_desc api_metadata[:description] end def tags if api_metadata.blank? return [] end api_metadata[:tags].map do |tag| [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] end end def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc) do |element| # Convert embedded thumbnails of journal posts to 'deviantart #123' # links. Strip embedded thumbnails of image posts. Example: # https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927. if element.name == "a" && element["data-sigil"] == "thumb" element.name = "span" # <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...> if element["class"].split.include?("lit") deviation_id = element["href"][/-(\d+)\z/, 1].to_i element.content = "deviantart ##{deviation_id}" else element.content = "" end end if element.name == "a" && element["href"].present? element["href"] = element["href"].gsub(%r{\Ahttps?://www\.deviantart\.com/users/outgoing\?}i, "") # href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing. uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil if uri.present? && uri.path.present? uri.scheme ||= "http" element["href"] = uri.to_s end end end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end def self.deviation_id_from_url(url) if url =~ NOT_NORMALIZABLE_ASSET nil elsif url =~ ASSET || url =~ FAVME $~[:base36_deviation_id].try(:to_i, 36) elsif url =~ PATH_ART || (url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART) || url =~ DA_DOWNLOAD || url =~ DEVIATION_ART $~[:deviation_id].to_i else nil end end def self.artist_name_from_url(url) if url =~ NOT_NORMALIZABLE_ASSET nil elsif url =~ ASSET || url =~ PATH_ART || url =~ PATH_PROFILE || url =~ DA_DOWNLOAD $~[:artist].try(:dasherize) elsif url !~ RESERVED_SUBDOMAINS && (url =~ SUBDOMAIN_ART || url =~ SUBDOMAIN_PROFILE) $~[:artist] else nil end end def self.title_from_url(url) if url =~ NOT_NORMALIZABLE_ASSET nil elsif url =~ ASSET || url =~ PATH_ART || url =~ DA_DOWNLOAD $~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence elsif url !~ RESERVED_SUBDOMAINS && url =~ SUBDOMAIN_ART $~[:title].to_s.titleize.strip.squeeze(" ").tr(" ", "-").presence else nil end end def deviation_id self.class.deviation_id_from_url(url) || self.class.deviation_id_from_url(referer_url) end def artist_name_from_url self.class.artist_name_from_url(url) || self.class.artist_name_from_url(referer_url) end def title_from_url self.class.title_from_url(url) || self.class.title_from_url(referer_url) end def page return nil if page_url_from_image_url.blank? resp = Danbooru::Http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1}) if resp.status.success? Nokogiri::HTML(resp.body.to_s) # the work was deleted elsif resp.code == 404 nil else raise "failed to fetch page (got code #{resp.code})" end end memoize :page # Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B"> # For hidden or deleted works the UUID will be nil. def uuid return nil if page.nil? meta = page.at_css('meta[property="da:appurl"]') return nil if meta.nil? appurl = meta["content"] uuid = appurl[%r{\ADeviantArt://deviation/(.*)\z}, 1] uuid end memoize :uuid def api_client api_client = DeviantArtApiClient.new( Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret ) api_client.access_token = Cache.get("da-access-token", 11.weeks) do api_client.access_token.to_hash end api_client end memoize :api_client def api_deviation return {} if uuid.nil? api_client.deviation(uuid) end memoize :api_deviation def api_metadata return {} if uuid.nil? api_client.metadata(uuid)[:metadata].first end memoize :api_metadata def api_download return {} unless uuid.present? && api_deviation[:is_downloadable] api_client.download(uuid) end memoize :api_download def api_response { deviation: api_deviation, metadata: api_metadata, download: api_download } end end end end