# frozen_string_literal: true module Source class Extractor class DeviantArt < Source::Extractor def self.enabled? Danbooru.config.deviantart_client_id.present? && Danbooru.config.deviantart_client_secret.present? end def match? Source::URL::DeviantArt === parsed_url end def image_urls [image_url] end def image_url # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user. if api_deviation.blank? if url =~ %r{\Ahttps://images-wixmp-} extract_largest(url) else url end elsif api_deviation[:is_downloadable] api_download[:src] elsif api_deviation[:flash].present? api_deviation.dig(:flash, :src) elsif api_deviation[:videos].present? api_deviation[:videos].max_by { |x| x[:filesize] }[:src] else src = api_deviation.dig(:content, :src) extract_largest(src) end end def extract_largest(src) if src =~ %r{\Ahttps://images-wixmp-} sample, separator, * = src.partition("/v1/") if separator.present? # :^) https://i.imgur.com/KG5bVRU.png # shamelessly aped from: # https://github.com/mikf/gallery-dl/blob/7990fe84f11271bc8e4079db6b0248dbeb79474a/gallery_dl/extractor/deviantart.py#L293 *, f_value = sample.split("/f/") data = {sub: "urn:app:", iss: "urn:app:", obj: [[{path: "/f/#{f_value}"}]], aud: ["urn:service:file.download"]} token = Base64.encode64(JSON.generate(data)).gsub("=", "").gsub("\n", "") "#{sample}?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJub25lIn0.#{token}." elsif deviation_id && deviation_id.to_i <= 790_677_560 && src !~ /\.gif\?/ src = src.sub(%r{(/f/[a-f0-9-]+/[a-f0-9-]+)}, '/intermediary\1') src.sub(%r{/v1/(fit|fill)/.*\z}i, "") else src end else src = src.sub(%r{\Ahttps?://orig\d+\.deviantart\.net}i, "http://origin-orig.deviantart.net") src.gsub(/q_\d+,strp/, "q_100") end end def page_url if stash_page.present? stash_page elsif api_deviation.present? api_deviation[:url] elsif deviation_id.present? page_url_from_image_url else nil end end def page_url_from_image_url stash_page || parsed_url.page_url || parsed_referer&.page_url end # Sta.sh posts have the same image URLs as DeviantArt but different page URLs. We use the Sta.sh page if we have one. # # Image: https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmjs1s-389a7505-142d-4b34-a790-ab4ea1ec9eaa.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7InBhdGgiOiJcL2ZcLzgzZDNlYjRkLTEzZTUtNGFlYS1hMDhmLThkNDMzMWQwMzNjNFwvZGNtanMxcy0zODlhNzUwNS0xNDJkLTRiMzQtYTc5MC1hYjRlYTFlYzllYWEucG5nIn1dXSwiYXVkIjpbInVybjpzZXJ2aWNlOmZpbGUuZG93bmxvYWQiXX0.pIddc32BoLpAJt6D8YcRFonoVy9nC8RgROlYwMp3huo # Page: https://sta.sh/01pwva4zzf98 def stash_page if parsed_url.stash_id.present? parsed_url.page_url elsif parsed_referer&.stash_id.present? parsed_referer.page_url end end def profile_url return nil if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" end # Prefer the name from the url because the api metadata won't be present when # the input url doesn't contain a deviation id, or the deviation is private or deleted. def artist_name if artist_name_from_url.present? artist_name_from_url elsif api_metadata.present? api_metadata.dig(:author, :username) else nil end end def artist_commentary_title api_metadata[:title] end def artist_commentary_desc api_metadata[:description] end def tags if api_metadata.blank? return [] end api_metadata[:tags].map do |tag| [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"] end end def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc) do |element| # Convert embedded thumbnails of journal posts to 'deviantart #123' # links. Strip embedded thumbnails of image posts. Example: # https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927. if element.name == "a" && element["data-sigil"] == "thumb" element.name = "span" # if element["class"].split.include?("lit") deviation_id = element["href"][/-(\d+)\z/, 1].to_i element.content = "deviantart ##{deviation_id}" else element.content = "" end end if element.name == "a" && element["href"].present? element["href"] = element["href"].gsub(%r{\Ahttps?://www\.deviantart\.com/users/outgoing\?}i, "") # href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing. uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil if uri.present? && uri.path.present? uri.scheme ||= "http" element["href"] = uri.to_s end end end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end def deviation_id parsed_url.work_id || parsed_referer&.work_id end def artist_name_from_url parsed_url.username || parsed_referer&.username end def page return nil if page_url_from_image_url.blank? resp = http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1}) if resp.status.success? resp.parse # the work was deleted elsif resp.code == 404 nil else raise "failed to fetch page (got code #{resp.code})" end end memoize :page # Scrape UUID from # For hidden or deleted works the UUID will be nil. def uuid return nil if page.nil? meta = page.at_css('meta[property="da:appurl"]') return nil if meta.nil? appurl = meta["content"] uuid = appurl[%r{\ADeviantArt://deviation/(.*)\z}, 1] uuid end memoize :uuid def api_client api_client = DeviantArtApiClient.new( Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret ) api_client.access_token = Cache.get("da-access-token", 11.weeks) do api_client.access_token.to_hash end api_client end memoize :api_client def api_deviation return {} if uuid.nil? api_client.deviation(uuid) end memoize :api_deviation def api_metadata return {} if uuid.nil? api_client.metadata(uuid)[:metadata].first end memoize :api_metadata def api_download return {} unless uuid.present? && api_deviation[:is_downloadable] api_client.download(uuid) end memoize :api_download def api_response { deviation: api_deviation, metadata: api_metadata, download: api_download } end end end end