sources: rename Sources::Strategies to Source::Extractor.

Rename Sources::Strategies to Source::Extractor. A Source::Extractor represents a thing that extracts information from a given URL.
2022-03-24 03:05:10 -05:00
parent 34aa22f90b
commit d9d3c1dfe4
63 changed files with 622 additions and 606 deletions
--- a/app/logical/source/extractor/art_station.rb
+++ b/app/logical/source/extractor/art_station.rb
@@ -0,0 +1,101 @@
+# frozen_string_literal: true
+
+# @see Source::URL::ArtStation
+class Source::Extractor
+  class ArtStation < Source::Extractor
+    def match?
+      Source::URL::ArtStation === parsed_url
+    end
+
+    def image_urls
+      if parsed_url.image_url?
+        [asset_url(url)]
+      else
+        image_urls_from_api
+      end
+    end
+
+    def page_url
+      return nil if project_id.blank?
+
+      if artist_name.present?
+        "https://#{artist_name}.artstation.com/projects/#{project_id}"
+      else
+        "https://www.artstation.com/artwork/#{project_id}"
+      end
+    end
+
+    def profile_url
+      return nil if artist_name.blank?
+      "https://www.artstation.com/#{artist_name}"
+    end
+
+    def artist_name
+      artist_name_from_url || api_response.dig(:user, :username)
+    end
+
+    def artist_commentary_title
+      api_response[:title]
+    end
+
+    def artist_commentary_desc
+      api_response[:description]
+    end
+
+    def dtext_artist_commentary_desc
+      ActionView::Base.full_sanitizer.sanitize(artist_commentary_desc)
+    end
+
+    def tags
+      api_response[:tags].to_a.map do |tag|
+        [tag, "https://www.artstation.com/search?q=" + CGI.escape(tag)]
+      end
+    end
+
+    def image_urls_from_api
+      api_response[:assets].to_a.map do |asset|
+        if asset[:asset_type] == "image"
+          asset_url(asset[:image_url])
+        elsif asset[:asset_type] == "video_clip"
+          next # XXX Skip for now; actually downloading these videos requires bypassing a Cloudflare captcha.
+
+          url = Nokogiri::HTML5.parse(asset[:player_embedded]).at("iframe").attr("src")
+          next if url.nil?
+
+          response = http.cache(1.minute).get(url)
+          next if response.status != 200
+
+          response.parse.at("video source").attr("src")
+        end
+      end.compact
+    end
+
+    def artist_name_from_url
+      parsed_url.username || parsed_referer&.username
+    end
+
+    def project_id
+      parsed_url.work_id || parsed_referer&.work_id
+    end
+
+    def api_response
+      return {} if project_id.blank?
+
+      resp = http.cache(1.minute).get("https://www.artstation.com/projects/#{project_id}.json")
+      return {} if resp.code != 200
+
+      resp.parse.with_indifferent_access
+    end
+    memoize :api_response
+
+    def asset_url(url)
+      parsed_url = Source::URL.parse(url)
+
+      image_sizes = %w[original 4k large medium small]
+      urls = image_sizes.map { |size| parsed_url.full_image_url(size) }
+
+      chosen_url = urls.find { |url| http_exists?(url) }
+      chosen_url || url
+    end
+  end
+end
--- a/app/logical/source/extractor/deviant_art.rb
+++ b/app/logical/source/extractor/deviant_art.rb
@@ -0,0 +1,209 @@
+# frozen_string_literal: true
+
+module Source
+  class Extractor
+    class DeviantArt < Source::Extractor
+      def self.enabled?
+        Danbooru.config.deviantart_client_id.present? && Danbooru.config.deviantart_client_secret.present?
+      end
+
+      def match?
+        Source::URL::DeviantArt === parsed_url
+      end
+
+      def image_urls
+        [image_url]
+      end
+
+      def image_url
+        # work is private, deleted, or the url didn't contain a deviation id; use image url as given by user.
+        if api_deviation.blank?
+          url
+        elsif api_deviation[:is_downloadable]
+          api_download[:src]
+        elsif api_deviation[:flash].present?
+          api_deviation.dig(:flash, :src)
+        elsif api_deviation[:videos].present?
+          api_deviation[:videos].max_by { |x| x[:filesize] }[:src]
+        else
+          src = api_deviation.dig(:content, :src)
+          if deviation_id && deviation_id.to_i <= 790_677_560 && src =~ %r{\Ahttps://images-wixmp-} && src !~ /\.gif\?/
+            src = src.sub(%r{(/f/[a-f0-9-]+/[a-f0-9-]+)}, '/intermediary\1')
+            src = src.sub(%r{/v1/(fit|fill)/.*\z}i, "")
+          end
+          src = src.sub(%r{\Ahttps?://orig\d+\.deviantart\.net}i, "http://origin-orig.deviantart.net")
+          src = src.gsub(/q_\d+,strp/, "q_100")
+          src
+        end
+      end
+
+      def page_url
+        if stash_page.present?
+          stash_page
+        elsif api_deviation.present?
+          api_deviation[:url]
+        elsif deviation_id.present?
+          page_url_from_image_url
+        else
+          nil
+        end
+      end
+
+      def page_url_from_image_url
+        stash_page || parsed_url.page_url || parsed_referer&.page_url
+      end
+
+      # Sta.sh posts have the same image URLs as DeviantArt but different page URLs. We use the Sta.sh page if we have one.
+      #
+      # Image: https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmjs1s-389a7505-142d-4b34-a790-ab4ea1ec9eaa.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOjdlMGQxODg5ODIyNjQzNzNhNWYwZDQxNWVhMGQyNmUwIiwiaXNzIjoidXJuOmFwcDo3ZTBkMTg4OTgyMjY0MzczYTVmMGQ0MTVlYTBkMjZlMCIsIm9iaiI6W1t7InBhdGgiOiJcL2ZcLzgzZDNlYjRkLTEzZTUtNGFlYS1hMDhmLThkNDMzMWQwMzNjNFwvZGNtanMxcy0zODlhNzUwNS0xNDJkLTRiMzQtYTc5MC1hYjRlYTFlYzllYWEucG5nIn1dXSwiYXVkIjpbInVybjpzZXJ2aWNlOmZpbGUuZG93bmxvYWQiXX0.pIddc32BoLpAJt6D8YcRFonoVy9nC8RgROlYwMp3huo
+      # Page: https://sta.sh/01pwva4zzf98
+      def stash_page
+        if parsed_url.stash_id.present?
+          parsed_url.page_url
+        elsif parsed_referer&.stash_id.present?
+          parsed_referer.page_url
+        end
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://www.deviantart.com/#{artist_name.downcase}"
+      end
+
+      # Prefer the name from the url because the api metadata won't be present when
+      # the input url doesn't contain a deviation id, or the deviation is private or deleted.
+      def artist_name
+        if artist_name_from_url.present?
+          artist_name_from_url
+        elsif api_metadata.present?
+          api_metadata.dig(:author, :username)
+        else
+          nil
+        end
+      end
+
+      def artist_commentary_title
+        api_metadata[:title]
+      end
+
+      def artist_commentary_desc
+        api_metadata[:description]
+      end
+
+      def tags
+        if api_metadata.blank?
+          return []
+        end
+
+        api_metadata[:tags].map do |tag|
+          [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
+        end
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc) do |element|
+          # Convert embedded thumbnails of journal posts to 'deviantart #123'
+          # links. Strip embedded thumbnails of image posts. Example:
+          # https://sa-dui.deviantart.com/art/Commission-Meinos-Kaen-695905927.
+          if element.name == "a" && element["data-sigil"] == "thumb"
+            element.name = "span"
+
+            # <a href="https://sa-dui.deviantart.com/journal/About-Commissions-223178193" data-sigil="thumb" class="thumb lit" ...>
+            if element["class"].split.include?("lit")
+              deviation_id = element["href"][/-(\d+)\z/, 1].to_i
+              element.content = "deviantart ##{deviation_id}"
+            else
+              element.content = ""
+            end
+          end
+
+          if element.name == "a" && element["href"].present?
+            element["href"] = element["href"].gsub(%r{\Ahttps?://www\.deviantart\.com/users/outgoing\?}i, "")
+
+            # href may be missing the `http://` bit (ex: `inprnt.com`, `//inprnt.com`). Add it if missing.
+            uri = Addressable::URI.heuristic_parse(element["href"]) rescue nil
+            if uri.present? && uri.path.present?
+              uri.scheme ||= "http"
+              element["href"] = uri.to_s
+            end
+          end
+        end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
+      end
+
+      def deviation_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def artist_name_from_url
+        parsed_url.username || parsed_referer&.username
+      end
+
+      def page
+        return nil if page_url_from_image_url.blank?
+
+        resp = http.cache(1.minute).get(page_url_from_image_url, follow: {max_hops: 1})
+
+        if resp.status.success?
+          resp.parse
+        # the work was deleted
+        elsif resp.code == 404
+          nil
+        else
+          raise "failed to fetch page (got code #{resp.code})"
+        end
+      end
+      memoize :page
+
+      # Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
+      # For hidden or deleted works the UUID will be nil.
+      def uuid
+        return nil if page.nil?
+        meta = page.at_css('meta[property="da:appurl"]')
+        return nil if meta.nil?
+
+        appurl = meta["content"]
+        uuid = appurl[%r{\ADeviantArt://deviation/(.*)\z}, 1]
+        uuid
+      end
+      memoize :uuid
+
+      def api_client
+        api_client = DeviantArtApiClient.new(
+          Danbooru.config.deviantart_client_id,
+          Danbooru.config.deviantart_client_secret
+        )
+        api_client.access_token = Cache.get("da-access-token", 11.weeks) do
+          api_client.access_token.to_hash
+        end
+        api_client
+      end
+      memoize :api_client
+
+      def api_deviation
+        return {} if uuid.nil?
+        api_client.deviation(uuid)
+      end
+      memoize :api_deviation
+
+      def api_metadata
+        return {} if uuid.nil?
+        api_client.metadata(uuid)[:metadata].first
+      end
+      memoize :api_metadata
+
+      def api_download
+        return {} unless uuid.present? && api_deviation[:is_downloadable]
+        api_client.download(uuid)
+      end
+      memoize :api_download
+
+      def api_response
+        {
+          deviation: api_deviation,
+          metadata: api_metadata,
+          download: api_download
+        }
+      end
+    end
+  end
+end
--- a/app/logical/source/extractor/fanbox.rb
+++ b/app/logical/source/extractor/fanbox.rb
@@ -0,0 +1,124 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Fanbox
+module Source
+  class Extractor
+    class Fanbox < Source::Extractor
+      def match?
+        Source::URL::Fanbox === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [parsed_url.full_image_url]
+        elsif api_response.present?
+          # There's two ways pics are returned via api:
+          # Pics in proper array: https://yanmi0308.fanbox.cc/posts/1141325
+          # Embedded pics (imageMap): https://www.fanbox.cc/@tsukiori/posts/1080657
+          images = api_response.dig("body", "images").to_a + api_response.dig("body", "imageMap").to_a.map { |id| id[1] }
+          # The following is needed because imageMap is sorted alphabetically rather than by image order
+          sort_order = api_response.dig("body", "blocks").to_a.map { |b| b["imageId"] if b["type"] == "image" }.compact.uniq
+          images = images.sort_by { |img| sort_order.index(img["id"]) } if sort_order.present?
+          images.pluck("originalUrl")
+        else
+          []
+        end
+      end
+
+      def page_url
+        if artist_name.present? && illust_id.present?
+          "https://#{artist_name}.fanbox.cc/posts/#{illust_id}"
+        elsif parsed_url.image_url? && artist_name.present?
+          # Cover images
+          "https://#{artist_name}.fanbox.cc"
+        end
+      end
+
+      def profile_url
+        return if artist_name.blank?
+
+        "https://#{artist_name}.fanbox.cc"
+      end
+
+      def artist_name
+        artist_name_from_url || api_response["creatorId"] || artist_api_response["creatorId"]
+      end
+
+      def display_name
+        api_response.dig("user", "name") || artist_api_response.dig("user", "name")
+      end
+
+      def other_names
+        [artist_name, display_name].compact.uniq
+      end
+
+      def tags
+        api_response["tags"].to_a.map { |tag| [tag, "https://fanbox.cc/tags/#{tag}"] }
+      end
+
+      def artist_commentary_title
+        api_response["title"]
+      end
+
+      def artist_commentary_desc
+        body = api_response["body"]
+        return if body.blank?
+
+        if body["text"].present?
+          body["text"]
+        elsif body["blocks"].present?
+          # Reference: https://official.fanbox.cc/posts/182757
+          # Commentary can get pretty complex, but unfortunately it's served in json format so it's a pain to parse it.
+          # I've left out parsing external embeds because each supported site has its own id mapped to the domain
+          commentary = body["blocks"].map do |node|
+            if node["type"] == "image"
+              body["imageMap"][node["imageId"]]["originalUrl"]
+            else
+              node["text"] || "\n"
+            end
+          end
+          commentary.join("\n")
+        end
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def artist_id_from_url
+        parsed_url.user_id || parsed_referer&.user_id
+      end
+
+      def artist_name_from_url
+        parsed_url.username || parsed_referer&.username
+      end
+
+      def api_response
+        return {} if illust_id.blank?
+        resp = client.get("https://api.fanbox.cc/post.info?postId=#{illust_id}")
+        json_response = JSON.parse(resp)["body"]
+
+        # At some point in 2020 fanbox stopped hiding R18 posts from the api
+        # This check exists in case they ever start blocking them again
+        return {} if json_response["restrictedFor"] == 2 && json_response["body"].blank?
+
+        json_response
+      rescue JSON::ParserError
+        {}
+      end
+
+      def artist_api_response
+        # Needed to fetch artist from cover pages
+        return {} if artist_id_from_url.blank?
+        resp = client.get("https://api.fanbox.cc/creator.get?userId=#{artist_id_from_url}")
+        JSON.parse(resp)["body"]
+      rescue JSON::ParserError
+        {}
+      end
+
+      def client
+        @client ||= http.headers(Origin: "https://fanbox.cc").cache(1.minute)
+      end
+    end
+  end
+end
--- a/app/logical/source/extractor/fantia.rb
+++ b/app/logical/source/extractor/fantia.rb
@@ -0,0 +1,163 @@
+# frozen_string_literal: true
+
+class Source::Extractor
+  class Fantia < Source::Extractor
+    def self.enabled?
+      Danbooru.config.fantia_session_id.present?
+    end
+
+    def match?
+      Source::URL::Fantia === parsed_url
+    end
+
+    def image_urls
+      return [parsed_url.full_image_url] if parsed_url.image_url?
+      return [image_from_downloadable(parsed_url)] if parsed_url.downloadable?
+
+      images = images_for_post.presence || images_for_product.presence || []
+
+      full_images = images.compact.map do |image|
+        parsed = Source::URL.parse(image)
+        if parsed&.image_url?
+          parsed.full_image_url
+        elsif parsed&.downloadable?
+          image_from_downloadable(parsed)
+        else
+          image
+        end
+      end
+      full_images.compact.uniq
+    end
+
+    def image_from_downloadable(url)
+      resp = http.head(url)
+      return url if resp.status != 200
+      resp.uri.to_s
+    end
+
+    def images_for_post
+      return [] unless api_response.present?
+      images = [api_response.dig("post", "thumb_micro")]
+      api_response.dig("post", "post_contents").to_a.map do |content|
+        next if content["visible_status"] != "visible"
+
+        case content["category"]
+        when "photo_gallery"
+          content["post_content_photos"].to_a.map { |i| images << i.dig("url", "original") }
+        when "file"
+          images << image_from_downloadable("https://www.fantia.jp/#{content["download_uri"]}")
+        when "blog"
+          begin
+            sub_json = JSON.parse(content["comment"])
+          rescue Json::ParserError
+            sub_json = {}
+          end
+          sub_json["ops"].to_a.map { |js| images << js.dig("insert", "fantiaImage", "url") }
+        end
+      end
+      images
+    end
+
+    def images_for_product
+      html_response&.css(".product-gallery-item .img-fluid").to_a.map do |element|
+        element["src"] unless element["src"] =~ %r{/fallback/}
+      end.compact
+    end
+
+    def page_url
+      parsed_url.page_url || parsed_referer&.page_url
+    end
+
+    def tags
+      case work_type
+      when "post"
+        api_response&.dig("post", "tags").to_a.map do |tag|
+          [tag["name"], "https://fantia.jp/posts?tag=#{tag["name"]}"]
+        end
+      when "product"
+        html_response&.css(".product-category a").to_a.map do |element|
+          tag_name = element.text.delete_prefix("#")
+          [tag_name, "https://fantia.jp/products?product_category=##{tag_name}"]
+        end
+      else
+        []
+      end
+    end
+
+    def other_names
+      case work_type
+      when "post"
+        [api_response&.dig("post", "fanclub", "creator_name")].compact
+      when "product"
+        [html_response&.at(".fanclub-name a")&.text].compact
+      end
+    end
+
+    def profile_url
+      case work_type
+      when "post"
+        fanclub_id = api_response&.dig("post", "fanclub", "id")
+        return unless fanclub_id.present?
+        "https://fantia.jp/fanclubs/#{fanclub_id}"
+      when "product"
+        href = html_response&.at(".fanclub-name a")&.[]("href")
+        return unless href.present?
+        URI.join("https://fantia.jp/", href).to_s
+      end
+    end
+
+    def artist_commentary_title
+      case work_type
+      when "post"
+        api_response&.dig("post", "title")
+      when "product"
+        html_response&.at(".product-title")&.text
+      end
+    end
+
+    def artist_commentary_desc
+      case work_type
+      when "post"
+        api_response&.dig("post", "comment")
+      when "product"
+        html_response&.at(".product-description")&.text
+      end
+    end
+
+    def dtext_artist_commentary_desc
+      DText.from_html(artist_commentary_desc)
+    end
+
+    def work_type
+      parsed_url.work_type || parsed_referer&.work_type
+    end
+
+    def work_id
+      parsed_url.work_id || parsed_referer&.work_id
+    end
+
+    def api_response
+      return {} unless work_type == "post"
+      api_url = "https://fantia.jp/api/v1/posts/#{work_id}"
+
+      response = http.cache(1.minute).get(api_url)
+      return {} unless response.status == 200
+
+      JSON.parse(response)
+    rescue JSON::ParserError
+      {}
+    end
+
+    def html_response
+      return nil unless work_type == "product"
+      response = http.cache(1.minute).get("https://fantia.jp/products/#{work_id}")
+
+      return nil unless response.status == 200
+      response.parse
+    end
+
+    def http
+      Danbooru::Http.new.cookies(_session_id: Danbooru.config.fantia_session_id)
+    end
+  end
+end
--- a/app/logical/source/extractor/foundation.rb
+++ b/app/logical/source/extractor/foundation.rb
@@ -0,0 +1,91 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Foundation
+module Source
+  class Extractor
+    class Foundation < Source::Extractor
+      def match?
+        Source::URL::Foundation === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.full_image_url.present?
+          [parsed_url.full_image_url]
+        elsif image_url = page&.at(".fullscreen img, .fullscreen video")&.attr(:src)
+          [Source::URL.parse(image_url).full_image_url].compact
+        else
+          []
+        end
+      end
+
+      def page_url
+        parsed_url.page_url || parsed_referer&.page_url
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get(page_url)
+        return nil unless response.status == 200
+
+        response.parse
+      end
+
+      def tags
+        tags = api_response.dig("props", "pageProps", "artwork", "tags").to_a
+
+        tags.map do |tag|
+          [tag, "https://foundation.app/tags/#{tag}"]
+        end
+      end
+
+      def artist_name
+        parsed_url.username || parsed_referer&.username || api_response.dig("props", "pageProps", "artwork", "creator", "username")
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://foundation.app/@#{artist_name}"
+      end
+
+      def profile_urls
+        [profile_url, creator_public_key_url].compact
+      end
+
+      def creator_public_key_url
+        return nil if creator_public_key.nil?
+        "https://foundation.app/#{creator_public_key}"
+      end
+
+      def creator_public_key
+        api_response.dig("props", "pageProps", "artwork", "creator", "publicKey")
+      end
+
+      def artist_commentary_title
+        return nil if page.blank?
+        page.at("meta[property='og:title']")["content"].gsub(/ \| Foundation$/, "")
+      end
+
+      def artist_commentary_desc
+        header = page&.xpath("//h2[text()='Description']")&.first
+        return nil if header.blank?
+        header&.parent&.search("div").first&.to_html
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc)
+      end
+
+      def api_response
+        return {} if page.nil?
+
+        data = page.at("#__NEXT_DATA__")&.text
+        return {} if data.blank?
+
+        JSON.parse(data).with_indifferent_access
+      end
+
+      memoize :api_response
+    end
+  end
+end
--- a/app/logical/source/extractor/hentai_foundry.rb
+++ b/app/logical/source/extractor/hentai_foundry.rb
@@ -0,0 +1,74 @@
+# frozen_string_literal: true
+
+# @see Source::URL::HentaiFoundry
+module Source
+  class Extractor
+    class HentaiFoundry < Source::Extractor
+      def match?
+        Source::URL::HentaiFoundry === parsed_url
+      end
+
+      def image_urls
+        image = page&.search("#picBox img")
+
+        return [] unless image
+
+        image.to_a.map { |img| URI.join(page_url, img["src"]).to_s }
+      end
+
+      def page_url
+        return nil if illust_id.blank?
+
+        if artist_name.blank?
+          "https://www.hentai-foundry.com/pic-#{illust_id}"
+        else
+          "https://www.hentai-foundry.com/pictures/user/#{artist_name}/#{illust_id}"
+        end
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get("#{page_url}?enterAgree=1")
+        return nil unless response.status == 200
+
+        response.parse
+      end
+
+      def tags
+        tags = page&.search(".boxbody [rel='tag']") || []
+
+        tags.map do |tag|
+          [tag.text, URI.join(page_url, tag.attr("href")).to_s]
+        end
+      end
+
+      def artist_name
+        parsed_url.username || parsed_referer&.username
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://www.hentai-foundry.com/user/#{artist_name}"
+      end
+
+      def artist_commentary_title
+        page&.search("#picBox .imageTitle")&.text
+      end
+
+      def artist_commentary_desc
+        page&.search("#descriptionBox .picDescript")&.to_html
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      memoize :page
+    end
+  end
+end
--- a/app/logical/source/extractor/lofter.rb
+++ b/app/logical/source/extractor/lofter.rb
@@ -0,0 +1,61 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Lofter
+module Source
+  class Extractor
+    class Lofter < Source::Extractor
+      def match?
+        Source::URL::Lofter === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [parsed_url.full_image_url]
+        else
+          images = page&.search(".imgclasstag img")
+          images.to_a.pluck("src").map { |url| Source::URL.parse(url).full_image_url }
+        end
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://#{artist_name}.lofter.com"
+      end
+
+      def page_url
+        return nil if illust_id.blank? || profile_url.blank?
+
+        "#{profile_url}/post/#{illust_id}"
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get(page_url)
+        response.parse if response.status == 200
+      end
+
+      def tags
+        return [] if artist_name.blank?
+        page&.search("[href*='#{artist_name}.lofter.com/tag/']").to_a.map do |tag|
+          href = tag.attr("href")
+          [Source::URL.parse(href).unescaped_tag, href]
+        end
+      end
+
+      def artist_commentary_desc
+        page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def artist_name
+        parsed_url.username || parsed_referer&.username
+      end
+
+      memoize :page
+    end
+  end
+end
--- a/app/logical/source/extractor/mastodon.rb
+++ b/app/logical/source/extractor/mastodon.rb
@@ -0,0 +1,97 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Mastodon
+class Source::Extractor
+  class Mastodon < Source::Extractor
+    def match?
+      Source::URL::Mastodon === parsed_url
+    end
+
+    def domain
+      case site_name
+      when "Pawoo" then "pawoo.net"
+      when "Baraag" then "baraag.net"
+      end
+    end
+
+    def image_urls
+      if parsed_url.image_url?
+        [parsed_url.full_image_url]
+      else
+        api_response.image_urls
+      end
+    end
+
+    def page_url
+      artist_name = artist_name_from_url
+      status_id = status_id_from_url
+      return if status_id.blank?
+
+      if artist_name.present?
+        "https://#{domain}/@#{artist_name}/#{status_id}"
+      else
+        "https://#{domain}/web/statuses/#{status_id}"
+      end
+    end
+
+    def profile_url
+      if artist_name_from_url.present?
+        "https://#{domain}/@#{artist_name_from_url}"
+      elsif api_response.present? && api_response.profile_url.present?
+        api_response.profile_url
+      end
+    end
+
+    def account_url
+      return if account_id.blank?
+      "https://#{domain}/web/accounts/#{account_id}"
+    end
+
+    def profile_urls
+      [profile_url, account_url].compact
+    end
+
+    def artist_name
+      api_response.account_name
+    end
+
+    def artist_name_from_url
+      parsed_url.username || parsed_referer&.username
+    end
+
+    def other_names
+      [api_response.display_name]
+    end
+
+    def account_id
+      parsed_url.user_id || parsed_referer&.user_id || api_response.account_id
+    end
+
+    def status_id_from_url
+      parsed_url.work_id || parsed_referer&.work_id
+    end
+
+    def artist_commentary_desc
+      api_response.commentary
+    end
+
+    def tags
+      api_response.tags
+    end
+
+    def dtext_artist_commentary_desc
+      DText.from_html(artist_commentary_desc) do |element|
+        if element.name == "a"
+          # don't include links to the toot itself.
+          media_urls = api_response.json["media_attachments"].map { |attr| attr["text_url"] }
+          element["href"] = nil if element["href"].in?(media_urls)
+        end
+      end.strip
+    end
+
+    def api_response
+      MastodonApiClient.new(domain, status_id_from_url)
+    end
+    memoize :api_response
+  end
+end
--- a/app/logical/source/extractor/moebooru.rb
+++ b/app/logical/source/extractor/moebooru.rb
@@ -0,0 +1,96 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Moebooru
+module Source
+  class Extractor
+    class Moebooru < Source::Extractor
+      delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true
+      delegate :site_name, :domain, to: :parsed_url
+
+      def match?
+        Source::URL::Moebooru === parsed_url
+      end
+
+      def image_urls
+        return [] if post_md5.blank? || file_ext.blank?
+        [Source::URL::Moebooru.full_image_url(site_name, post_md5, file_ext, post_id)]
+      end
+
+      def page_url
+        return nil if post_id.blank?
+        "https://#{domain}/post/show/#{post_id}"
+      end
+
+      def tags
+        api_response[:tags].to_s.split.map do |tag|
+          [tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]
+        end
+      end
+
+      # XXX the base extractor excludes artist tags from the translated tags; we don't want that for moebooru.
+      def translated_tags
+        tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
+      end
+
+      # Moebooru returns an empty array when doing an md5:<hash> search for a
+      # deleted post. Because of this, api_response may be empty in some cases.
+      def api_response
+        if post_id_from_url.present?
+          params = { tags: "id:#{post_id_from_url}" }
+        elsif post_md5_from_url.present?
+          params = { tags: "md5:#{post_md5_from_url}" }
+        else
+          return {}
+        end
+
+        response = http.cache(1.minute).get("https://#{domain}/post.json", params: params)
+        post = response.parse.first&.with_indifferent_access
+        post || {}
+      end
+      memoize :api_response
+
+      concerning :HelperMethods do
+        def sub_extractor
+          @sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
+        end
+
+        def file_ext
+          if parsed_url.original_file_ext.present?
+            parsed_url.original_file_ext
+
+          # file_ext is not present in konachan's api (only on yande.re)
+          elsif api_response[:file_ext].present?
+            api_response[:file_ext]
+
+          # file_url is not present in yande.re's api on deleted posts
+          elsif api_response[:file_url].present?
+            api_response[:file_url][/\.(jpg|jpeg|png|gif)\z/i, 1]
+
+          # the api_response wasn't available because it's a deleted post.
+          elsif post_md5.present?
+            %w[jpg png gif].find { |ext| http_exists?("https://#{domain}/image/#{post_md5}.#{ext}") }
+
+          else
+            nil
+          end
+        end
+
+        def post_id_from_url
+          parsed_url.work_id || parsed_referer&.work_id
+        end
+
+        def post_md5_from_url
+          parsed_url.md5 || parsed_referer&.md5
+        end
+
+        def post_id
+          post_id_from_url || api_response[:id]
+        end
+
+        def post_md5
+          post_md5_from_url || api_response[:md5]
+        end
+      end
+    end
+  end
+end
--- a/app/logical/source/extractor/newgrounds.rb
+++ b/app/logical/source/extractor/newgrounds.rb
@@ -0,0 +1,87 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Newgrounds
+module Source
+  class Extractor
+    class Newgrounds < Source::Extractor
+      def match?
+        Source::URL::Newgrounds === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [url]
+        else
+          urls = []
+
+          urls += page&.css(".image img").to_a.map { |img| img["src"] }
+          urls += page&.css("#author_comments img[data-user-image='1']").to_a.map { |img| img["data-smartload-src"] || img["src"] }
+
+          urls.compact
+        end
+      end
+
+      def page_url
+        return nil if illust_title.blank? || user_name.blank?
+
+        "https://www.newgrounds.com/art/view/#{user_name}/#{illust_title}"
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get(page_url)
+        return nil if response.status == 404
+
+        response.parse
+      end
+      memoize :page
+
+      def tags
+        page&.css("#sidestats .tags a").to_a.map do |tag|
+          [tag.text, "https://www.newgrounds.com/search/conduct/art?match=tags&tags=" + tag.text]
+        end
+      end
+
+      def normalize_tag(tag)
+        tag = tag.tr("-", "_")
+        super(tag)
+      end
+
+      def artist_name
+        name = page&.css(".item-user .item-details h4 a")&.text&.strip || user_name
+        name&.downcase
+      end
+
+      def other_names
+        [artist_name, user_name].compact.uniq
+      end
+
+      def profile_url
+        # user names are not mutable, artist names are.
+        # However we need the latest name for normalization
+        "https://#{artist_name}.newgrounds.com"
+      end
+
+      def artist_commentary_title
+        page&.css(".pod-head > [itemprop='name']")&.text
+      end
+
+      def artist_commentary_desc
+        page&.css("#author_comments")&.to_html
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc)
+      end
+
+      def user_name
+        parsed_url.username || parsed_referer&.username
+      end
+
+      def illust_title
+        parsed_url.work_title || parsed_referer&.work_title
+      end
+    end
+  end
+end
--- a/app/logical/source/extractor/nico_seiga.rb
+++ b/app/logical/source/extractor/nico_seiga.rb
@@ -0,0 +1,111 @@
+# frozen_string_literal: true
+
+# @see Source::URL::NicoSeiga
+module Source
+  class Extractor
+    class NicoSeiga < Source::Extractor
+      def self.enabled?
+        Danbooru.config.nico_seiga_user_session.present?
+      end
+
+      def match?
+        Source::URL::NicoSeiga === parsed_url
+      end
+
+      def image_urls
+        if image_id.present?
+          [image_url_for("https://seiga.nicovideo.jp/image/source/#{image_id}")]
+        elsif illust_id.present?
+          [image_url_for("https://seiga.nicovideo.jp/image/source/#{illust_id}")]
+        elsif manga_id.present? && api_client.image_ids.present?
+          api_client.image_ids.map { |id| image_url_for("https://seiga.nicovideo.jp/image/source/#{id}") }
+        else
+          [image_url_for(url)]
+        end
+      end
+
+      def image_url_for(url)
+        return url if api_client.blank?
+
+        resp = api_client.head(url)
+        if resp.uri.to_s =~ %r{https?://.+/(\w+/\d+/\d+)\z}i
+          "https://lohas.nicoseiga.jp/priv/#{$1}"
+        else
+          url
+        end
+      end
+
+      def page_url
+        parsed_referer&.page_url || parsed_url.page_url
+      end
+
+      def profile_url
+        "https://seiga.nicovideo.jp/user/illust/#{api_client.user_id}" if api_client&.user_id.present?
+      end
+
+      def artist_name
+        return if api_client.blank?
+        api_client.user_name
+      end
+
+      def artist_commentary_title
+        return if api_client.blank?
+        api_client.title
+      end
+
+      def artist_commentary_desc
+        return if api_client.blank?
+        api_client.description
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc) do |element|
+          if element.name == "font" && element["color"] == "white"
+            element.content = "[spoiler]#{element.content}[/spoiler]"
+          end
+        end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp
+      end
+
+      def tag_name
+        return if api_client&.user_id.blank?
+        "nicoseiga#{api_client.user_id}"
+      end
+
+      def tags
+        return [] if api_client.blank?
+
+        base_url = "https://seiga.nicovideo.jp/"
+        base_url += "manga/" if manga_id.present?
+        base_url += "tag/"
+
+        api_client.tags.map do |name|
+          [name, base_url + CGI.escape(name)]
+        end
+      end
+
+      def image_id
+        parsed_url.image_id || parsed_referer&.image_id
+      end
+
+      def illust_id
+        parsed_url.illust_id || parsed_referer&.illust_id
+      end
+
+      def manga_id
+        parsed_url.manga_id || parsed_referer&.manga_id
+      end
+
+      def api_client
+        if illust_id.present?
+          NicoSeigaApiClient.new(work_id: illust_id, type: "illust", http: http)
+        elsif manga_id.present?
+          NicoSeigaApiClient.new(work_id: manga_id, type: "manga", http: http)
+        elsif image_id.present?
+          # We default to illust to attempt getting the api anyway
+          NicoSeigaApiClient.new(work_id: image_id, type: "illust", http: http)
+        end
+      end
+      memoize :api_client
+    end
+  end
+end
--- a/app/logical/source/extractor/nijie.rb
+++ b/app/logical/source/extractor/nijie.rb
@@ -0,0 +1,174 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Nijie
+module Source
+  class Extractor
+    class Nijie < Source::Extractor
+      def self.enabled?
+        Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present?
+      end
+
+      def match?
+        Source::URL::Nijie === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [parsed_url.full_image_url]
+        else
+          image_urls_from_page
+        end
+      end
+
+      def image_urls_from_page
+        if doujin?
+          images = page&.search("#dojin_left .left img").to_a.pluck("src")
+          images += page&.search("#dojin_diff img.mozamoza").to_a.pluck("data-original")
+        else
+          images = page&.search("div#gallery a > .mozamoza").to_a.pluck("src")
+        end
+
+        images.map { |img| Source::URL.parse("https:#{img}").full_image_url }
+      end
+
+      def page_url
+        return nil if illust_id.blank?
+        "https://nijie.info/view.php?id=#{illust_id}"
+      end
+
+      def profile_url
+        return nil if artist_id.blank?
+        "https://nijie.info/members.php?id=#{artist_id}"
+      end
+
+      def artist_name
+        if doujin?
+          page&.at("#dojin_left .right a[href*='members.php?id=']")&.text
+        else
+          page&.at("a.name")&.text
+        end
+      end
+
+      def artist_commentary_title
+        if doujin?
+          page&.search("#dojin_text p.title")&.text
+        else
+          page&.search("h2.illust_title")&.text
+        end
+      end
+
+      def artist_commentary_desc
+        if doujin?
+          page&.search("#dojin_text p:not(.title)")&.to_html
+        else
+          page&.search('#illust_text > p')&.to_html
+        end
+      end
+
+      def tags
+        links = page&.search("div#view-tag a") || []
+
+        search_links = links.select do |node|
+          node["href"] =~ /search(?:_dojin)?\.php/
+        end
+
+        search_links.map do |node|
+          [node.inner_text, "https://nijie.info" + node.attr("href")]
+        end
+      end
+
+      def tag_name
+        "nijie" + artist_id.to_s
+      end
+
+      def self.to_dtext(text)
+        text = text.to_s.gsub(/\r\n|\r/, "<br>")
+
+        dtext = DText.from_html(text) do |element|
+          if element.name == "a" && element["href"]&.start_with?("/jump.php")
+            element["href"] = element.text
+          end
+        end
+
+        dtext.strip
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def artist_id_from_url
+        parsed_url.user_id || parsed_referer&.user_id
+      end
+
+      def artist_id_from_page
+        page&.search("a.name")&.first&.attr("href")&.match(/members\.php\?id=(\d+)/) { $1.to_i }
+      end
+
+      def artist_id
+        artist_id_from_url || artist_id_from_page
+      end
+
+      def doujin?
+        page&.at("#dojin_left").present?
+      end
+
+      def page
+        return nil if page_url.blank? || client.blank?
+
+        response = client.cache(1.minute).get(page_url)
+
+        if response.status != 200 || response.parse.search("#login_illust").present?
+          clear_cached_session_cookie!
+        else
+          response.parse
+        end
+      end
+      memoize :page
+
+      def client
+        return nil if cached_session_cookie.nil?
+        http.cookies(R18: 1, **cached_session_cookie)
+      end
+
+      def http
+        super.timeout(60).use(retriable: { max_retries: 20 })
+      end
+
+      # { "NIJIEIJIEID" => "5ca3f816c0c1f3e647940b08b8ab7a45", "nijie_tok" => <long-base64-string> }
+      def cached_session_cookie
+        Cache.get("nijie-session-cookie", 60.minutes, skip_nil: true) do
+          session_cookie
+        end
+      end
+
+      def clear_cached_session_cookie!
+        flush_cache # clear memoized session cookie
+        Cache.delete("nijie-session-cookie")
+      end
+
+      def session_cookie
+        login_page = http.get("https://nijie.info/login.php").parse
+
+        form = {
+          email: Danbooru.config.nijie_login,
+          password: Danbooru.config.nijie_password,
+          url: login_page.at("input[name='url']")&.fetch("value"),
+          save: "on",
+          ticket: ""
+        }
+
+        response = http.post("https://nijie.info/login_int.php", form: form)
+
+        if response.status == 200
+          response.cookies.cookies.map { |cookie| [cookie.name, cookie.value] }.to_h
+        else
+          DanbooruLogger.info "Nijie login failed (#{url}, #{response.status})"
+          nil
+        end
+      end
+
+      memoize :client, :cached_session_cookie
+    end
+  end
+end
--- a/app/logical/source/extractor/null.rb
+++ b/app/logical/source/extractor/null.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+module Source
+  class Extractor
+    class Null < Source::Extractor
+      def image_urls
+        [url]
+      end
+
+      def page_url
+        nil
+      end
+
+      def artists
+        ArtistFinder.find_artists(url)
+      end
+    end
+  end
+end
--- a/app/logical/source/extractor/pixiv.rb
+++ b/app/logical/source/extractor/pixiv.rb
@@ -0,0 +1,165 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Pixiv
+module Source
+  class Extractor
+    class Pixiv < Source::Extractor
+      def self.enabled?
+        Danbooru.config.pixiv_phpsessid.present?
+      end
+
+      def self.to_dtext(text)
+        return nil if text.nil?
+
+        text = text.gsub(%r{<a href="https?://www\.pixiv\.net/en/artworks/([0-9]+)">illust/[0-9]+</a>}i) do |_match|
+          pixiv_id = $1
+          %(pixiv ##{pixiv_id} "»":[#{Routes.posts_path(tags: "pixiv:#{pixiv_id}")}])
+        end
+
+        text = text.gsub(%r{<a href="https?://www\.pixiv\.net/en/users/([0-9]+)">user/[0-9]+</a>}i) do |_match|
+          member_id = $1
+          profile_url = "https://www.pixiv.net/users/#{member_id}"
+
+          artist_search_url = Routes.artists_path(search: { url_matches: profile_url })
+
+          %("user/#{member_id}":[#{profile_url}] "»":[#{artist_search_url}])
+        end
+
+        DText.from_html(text) do |element|
+          if element.name == "a" && element["href"].match?(%r!\A/jump\.php\?!)
+            element["href"] = Addressable::URI.heuristic_parse(element["href"]).normalized_query
+          end
+        end
+      end
+
+      def match?
+        Source::URL::Pixiv === parsed_url
+      end
+
+      def image_urls
+        if is_ugoira?
+          [api_ugoira[:originalSrc]]
+        elsif parsed_url.image_url? && parsed_url.page && original_urls.present?
+          [original_urls[parsed_url.page]]
+        elsif parsed_url.image_url?
+          [parsed_url.to_s]
+        else
+          original_urls
+        end
+      end
+
+      def original_urls
+        api_pages.pluck("urls").pluck("original").to_a
+      end
+
+      def page_url
+        return nil if illust_id.blank?
+        "https://www.pixiv.net/artworks/#{illust_id}"
+      end
+
+      def profile_url
+        if api_illust[:userId].present?
+          "https://www.pixiv.net/users/#{api_illust[:userId]}"
+        elsif parsed_url.profile_url.present?
+          parsed_url.profile_url
+        end
+      end
+
+      def stacc_url
+        return nil if moniker.blank?
+        "https://www.pixiv.net/stacc/#{moniker}"
+      end
+
+      def profile_urls
+        [profile_url, stacc_url].compact
+      end
+
+      def artist_name
+        api_illust[:userName]
+      end
+
+      def other_names
+        other_names = [artist_name]
+        other_names << moniker unless moniker&.starts_with?("user_")
+        other_names.compact.uniq
+      end
+
+      def artist_commentary_title
+        api_illust[:title]
+      end
+
+      def artist_commentary_desc
+        api_illust[:description]
+      end
+
+      def tag_name
+        moniker
+      end
+
+      def tags
+        api_illust.dig(:tags, :tags).to_a.map do |item|
+          tag = item[:tag]
+          [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
+        end
+      end
+
+      def normalize_tag(tag)
+        tag.gsub(/\d+users入り\z/i, "")
+      end
+
+      def download_file!(url)
+        file = super(url)
+        file.frame_data = ugoira_frame_data if is_ugoira?
+        file
+      end
+
+      def translate_tag(tag)
+        translated_tags = super(tag)
+
+        if translated_tags.empty? && tag.include?("/")
+          translated_tags = tag.split("/").flat_map { |translated_tag| super(translated_tag) }
+        end
+
+        translated_tags
+      end
+
+      def related_posts_search_query
+        illust_id.present? ? "pixiv:#{illust_id}" : "source:#{url}"
+      end
+
+      def is_ugoira?
+        original_urls.any? { |url| Source::URL.parse(url).is_ugoira? }
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def api_client
+        PixivAjaxClient.new(Danbooru.config.pixiv_phpsessid, http: http)
+      end
+
+      def api_illust
+        api_client.illust(illust_id)
+      end
+
+      def api_pages
+        api_client.pages(illust_id)
+      end
+
+      def api_ugoira
+        api_client.ugoira_meta(illust_id)
+      end
+
+      def moniker
+        parsed_url.username || api_illust[:userAccount]
+      end
+
+      def ugoira_frame_data
+        api_ugoira[:frames]
+      end
+
+      memoize :illust_id, :api_client, :api_illust, :api_pages, :api_ugoira
+    end
+  end
+end
--- a/app/logical/source/extractor/pixiv_sketch.rb
+++ b/app/logical/source/extractor/pixiv_sketch.rb
@@ -0,0 +1,82 @@
+# frozen_string_literal: true
+
+# @see Source::URL::PixivSketch
+module Source
+  class Extractor
+    class PixivSketch < Source::Extractor
+      def match?
+        Source::URL::PixivSketch === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [parsed_url.full_image_url]
+        else
+          image_urls_from_api
+        end
+      end
+
+      def image_urls_from_api
+        api_response.dig("data", "media").to_a.pluck("photo").pluck("original").pluck("url2x")
+      end
+
+      def profile_url
+        "https://sketch.pixiv.net/@#{artist_name}" if artist_name.present?
+      end
+
+      def artist_name
+        api_response.dig("data", "user", "unique_name")
+      end
+
+      def other_names
+        [artist_name, display_name].compact
+      end
+
+      def profile_urls
+        [profile_url, pixiv_profile_url].compact
+      end
+
+      def artist_commentary_desc
+        api_response.dig("data", "text")
+      end
+
+      def tags
+        api_response.dig("data", "tags").to_a.map do |tag|
+          [tag, "https://sketch.pixiv.net/tags/#{tag}"]
+        end
+      end
+
+      def display_name
+        api_response.dig("data", "user", "name")
+      end
+
+      def pixiv_profile_url
+        "https://www.pixiv.net/users/#{pixiv_user_id}" if pixiv_user_id.present?
+      end
+
+      def pixiv_user_id
+        api_response.dig("data", "user", "pixiv_user_id")
+      end
+
+      # curl https://sketch.pixiv.net/api/items/5835314698645024323.json | jq
+      def api_response
+        return {} if api_url.blank?
+
+        response = http.cache(1.minute).get(api_url)
+        return {} if response.status == 404
+
+        response.parse
+      end
+
+      def page_url
+        parsed_url.page_url || parsed_referer&.page_url
+      end
+
+      def api_url
+        parsed_url.api_url || parsed_referer&.api_url
+      end
+
+      memoize :api_response
+    end
+  end
+end
--- a/app/logical/source/extractor/plurk.rb
+++ b/app/logical/source/extractor/plurk.rb
@@ -0,0 +1,116 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Plurk
+module Source
+  class Extractor
+    class Plurk < Source::Extractor
+      def match?
+        Source::URL::Plurk === parsed_url
+      end
+
+      def image_urls
+        # * Posts can have up to 10 images.
+        # * Artists commonly post extra images by replying to their own post.
+        # * Adult posts are hidden for logged out users. The main images can be found by
+        #   scraping a <script> tag, but an API call is needed to get the images in the replies.
+        #
+        # Examples:
+        # * https://www.plurk.com/p/om6zv4 (non-adult, single image)
+        # * https://www.plurk.com/p/okxzae (non-adult, multiple images, with replies)
+        # * https://www.plurk.com/p/omc64y (adult, multiple images, with replies)
+
+        if parsed_url.image_url?
+          [url]
+        elsif page_json["porn"]
+          # in case of adult posts, we get the main images and the replies separately
+          images_from_script_tag + images_from_replies
+        else
+          images_from_page
+        end
+      end
+
+      def page_url
+        return nil if illust_id.blank?
+        "https://plurk.com/p/#{illust_id}"
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get(page_url)
+        return nil unless response.status == 200
+
+        response.parse
+      end
+
+      # For non-adult works, returns both the main images and the images posted by the artist in the replies.
+      # For adult works, returns only the main images.
+      def images_from_page
+        page&.search(".bigplurk .content a img, .response.highlight_owner .content a img").to_a.pluck("alt")
+      end
+
+      # Returns only the main images, not the images posted in the replies. Used for adult works.
+      def images_from_script_tag
+        URI.extract(page_json["content_raw"])
+      end
+
+      # Returns images posted by the artist in the replies. Used for adult works.
+      def images_from_replies
+        artist_responses = api_replies["responses"].to_a.select { _1["user_id"].to_i == artist_id.to_i }
+        urls = artist_responses.pluck("content_raw").flat_map { URI.extract(_1) }
+        urls.select { Source::URL.parse(_1)&.image_url? }.uniq
+      end
+
+      def page_json
+        script_text = page&.search("body script").to_a.map(&:text).grep(/plurk =/).first.to_s
+        json = script_text.strip.delete_prefix("plurk = ").delete_suffix(";").gsub(/new Date\((.*?)\)/) { $1 }
+        return {} if json.blank?
+        JSON.parse(json)
+      end
+
+      def api_replies
+        return {} if illust_id.blank?
+
+        response = http.cache(1.minute).post("https://www.plurk.com/Responses/get", form: { plurk_id: illust_id.to_i(36), from_response_id: 0 })
+        return {} unless response.status == 200
+
+        response.parse
+      end
+
+      def tag_name
+        page&.at(".bigplurk .user a")&.[](:href)&.gsub(%r{^/}, "")
+      end
+
+      def artist_name
+        page&.at(".bigplurk .user a")&.text
+      end
+
+      def artist_id
+        page&.at("a[data-uid]")&.attr("data-uid").to_i
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://www.plurk.com/#{tag_name}"
+      end
+
+      def artist_commentary_desc
+        page&.search(".bigplurk .content .text_holder, .response.highlight_owner .content .text_holder")&.to_html
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc) do |element|
+          if element.name == "a"
+            element.content = ""
+          end
+        end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
+      end
+
+      memoize :page, :page_json, :api_replies
+    end
+  end
+end
--- a/app/logical/source/extractor/skeb.rb
+++ b/app/logical/source/extractor/skeb.rb
@@ -0,0 +1,112 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Skeb
+module Source
+  class Extractor
+    class Skeb < Extractor
+      def match?
+        Source::URL::Skeb === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [url]
+        elsif unwatermarked_url.present?
+          # If the unwatermarked URL is present, then find and replace the watermarked URL
+          # with the unwatermarked version (unless the watermarked version is a video or
+          # gif, in which case the unwatermarked URL is not used because it's a still image).
+          #
+          # https://skeb.jp/@goma_feet/works/1: https://skeb.imgix.net/uploads/origins/78ca23dc-a053-4ebe-894f-d5a06e228af8?bg=%23fff&auto=format&w=800&s=3de55b04236059113659f99fd6900d7d
+          # https://skeb.jp/@2gi0gi_/works/13: https://skeb.imgix.net/requests/191942_0?bg=%23fff&fm=jpg&q=45&w=696&s=5783ee951cc55d183713395926389453
+          # https://skeb.jp/@tontaro_/works/316: https://skeb.imgix.net/uploads/origins/5097b1e1-18ce-418e-82f0-e7e2cdab1cea?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&fm=mp4&w=800&s=fcff06871e114b3dbf505c04f27b5ed1
+          sample_urls.map do |sample_url|
+            if sample_url.path == unwatermarked_url.path && sample_url.watermarked? && !sample_url.animated?
+              unwatermarked_url
+            else
+              sample_url
+            end
+          end.map(&:to_s)
+        else
+          sample_urls.map(&:to_s)
+        end
+      end
+
+      def sample_urls
+        api_response["previews"].to_a.pluck("url").compact.map { |url| Source::URL.parse(url) }
+      end
+
+      # Some posts have an unwatermarked version of the image. Usually it's lower
+      # resolution and lower JPEG quality than the watermarked image. Multi-image posts
+      # will have only one unwatermarked URL.
+      def unwatermarked_url
+        return nil if api_response["article_image_url"].nil?
+        Source::URL.parse(api_response["article_image_url"])
+      end
+
+      def page_url
+        return unless artist_name.present? && illust_id.present?
+        "https://skeb.jp/@#{artist_name}/works/#{illust_id}"
+      end
+
+      def api_url
+        return nil unless artist_name.present? && illust_id.present?
+        "https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}"
+      end
+
+      def api_response
+        return {} unless api_url.present?
+
+        headers = {
+          Referer: profile_url,
+          Authorization: "Bearer null",
+        }
+
+        response = http.cache(1.minute).headers(headers).get(api_url)
+        return {} unless response.status == 200
+        # The status check is required for private commissions, which return 404
+
+        response.parse
+      end
+
+      def profile_url
+        return nil if artist_name.blank?
+        "https://skeb.jp/@#{artist_name}"
+      end
+
+      def artist_name
+        parsed_url.username || parsed_referer&.username
+      end
+
+      def display_name
+        api_response&.dig("creator", "name")
+      end
+
+      def illust_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def other_names
+        [display_name].compact.uniq
+      end
+
+      def artist_commentary_desc
+        api_response&.dig("source_body") || api_response&.dig("body")
+        # skeb "titles" are not needed: it's just the first few characters of the description
+      end
+
+      def client_response
+        api_response&.dig("source_thanks") || api_response&.dig("thanks")
+      end
+
+      def dtext_artist_commentary_desc
+        if client_response.present? && artist_commentary_desc.present?
+          "h6. Original Request:\n\n#{artist_commentary_desc}\n\nh6. Client Response:\n\n#{client_response}"
+        else
+          artist_commentary_desc
+        end
+      end
+
+      memoize :api_response
+    end
+  end
+end
--- a/app/logical/source/extractor/tinami.rb
+++ b/app/logical/source/extractor/tinami.rb
@@ -0,0 +1,113 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Tinami
+module Source
+  class Extractor
+    class Tinami < Source::Extractor
+
+      def match?
+        Source::URL::Tinami === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [url]
+
+        # http://www.tinami.com/view/1087268 (single image)
+        elsif page&.css("img.captify")&.size.to_i == 1
+          [full_image_url].compact
+
+        # http://www.tinami.com/view/1087270 (multiple images)
+        elsif image_sub_ids.present?
+          image_sub_ids.map { |sub_id| full_image_url(sub_id) }.compact
+
+        # http://www.tinami.com/view/1087271 (multiple images)
+        elsif nv_body_image_urls.present?
+          nv_body_image_urls
+
+        # http://www.tinami.com/view/1087267 (no images, text only)
+        else
+          []
+        end
+      end
+
+      def nv_body_image_urls
+        page&.css(".viewbody .nv_body img").to_a.map do |img|
+          "https:#{img[:src]}" # img[:src] == "//img.tinami.com/illust2/img/619/6234b647da609.jpg"
+        end
+      end
+
+      def image_sub_ids
+        page&.css(".viewbody #controller_model .thumbnail_list").to_a.map { |td| td.attr("sub_id") }
+      end
+
+      def page_url
+        parsed_url.page_url || parsed_referer&.page_url
+      end
+
+      def tags
+        page&.css("#view .tag a[href^='/search/list']").to_a.map do |tag|
+          [tag.text, "https://www.tinami.com/search/list?keyword=#{CGI.escape(tag.text)}"]
+        end
+      end
+
+      def profile_url
+        "https://www.tinami.com/creator/profile/#{user_id}" if user_id.present?
+      end
+
+      def tag_name
+        nil
+      end
+
+      def artist_name
+        page&.at("#view .prof > p > a > strong")&.text
+      end
+
+      def artist_commentary_title
+        page&.at("#view .viewdata h1")&.text.to_s.strip
+      end
+
+      def artist_commentary_desc
+        page&.at("#view .comment .description")&.text.to_s.strip.delete("\t")
+      end
+
+      def user_id
+        url = page&.at("#view .prof > p > a")&.attr("href")&.prepend("https://www.tinami.com")
+        Source::URL.parse(url)&.user_id
+      end
+
+      def work_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def ethna_csrf
+        page&.at("#open_original_content input[name=ethna_csrf]")&.attr("value")
+      end
+
+      def full_image_url(sub_id = nil)
+        return nil unless work_id.present? && ethna_csrf.present?
+
+        # Note that we have to spoof the Referer here.
+        response = http.post(page_url, form: { action_view_original: true, cont_id: work_id, sub_id: sub_id, ethna_csrf: ethna_csrf })
+        return nil unless response.status == 200
+
+        response.parse.at("body > div > a > img[src^='//img.tinami.com']")&.attr("src")&.prepend("https:")
+      end
+
+      def page
+        return nil if page_url.blank?
+
+        response = http.cache(1.minute).get(page_url)
+        return nil unless response.status == 200
+
+        response.parse
+      end
+
+      def http
+        super.cookies(Tinami2SESSID: Danbooru.config.tinami_session_id).use(:spoof_referrer)
+      end
+
+      memoize :page, :user_id, :work_id, :ethna_csrf, :image_urls, :image_sub_ids, :nv_body_image_urls
+    end
+  end
+end
--- a/app/logical/source/extractor/tumblr.rb
+++ b/app/logical/source/extractor/tumblr.rb
@@ -0,0 +1,145 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Tumblr
+class Source::Extractor
+  class Tumblr < Source::Extractor
+    def self.enabled?
+      Danbooru.config.tumblr_consumer_key.present?
+    end
+
+    def match?
+      Source::URL::Tumblr === parsed_url
+    end
+
+    def image_urls
+      return [find_largest(parsed_url)].compact if parsed_url.asset_url?
+
+      assets = []
+
+      case post[:type]
+      when "photo"
+        assets += post[:photos].map do |photo|
+          sizes = [photo[:original_size]] + photo[:alt_sizes]
+          biggest = sizes.max_by { |x| x[:width] * x[:height] }
+          biggest[:url]
+        end
+
+      when "video"
+        assets += [post[:video_url]]
+      end
+
+      assets += inline_images
+      assets.map { |url| find_largest(url) }
+    end
+
+    def page_url
+      parsed_url.page_url || parsed_referer&.page_url || post_url_from_image_html&.page_url
+    end
+
+    def profile_url
+      parsed_url.profile_url || parsed_referer&.profile_url || post_url_from_image_html&.profile_url
+    end
+
+    def artist_commentary_title
+      case post[:type]
+      when "text", "link"
+        post[:title]
+
+      when "answer"
+        "#{post[:asking_name]} asked: #{post[:question]}"
+
+      else
+        nil
+      end
+    end
+
+    def artist_commentary_desc
+      case post[:type]
+      when "text"
+        post[:body]
+
+      when "link"
+        post[:description]
+
+      when "photo", "video"
+        post[:caption]
+
+      when "answer"
+        post[:answer]
+
+      else
+        nil
+      end
+    end
+
+    def tags
+      post[:tags].to_a.map do |tag|
+        [tag, "https://tumblr.com/tagged/#{CGI.escape(tag)}"]
+      end.uniq
+    end
+
+    def normalize_tag(tag)
+      tag = tag.tr("-", "_")
+      super(tag)
+    end
+
+    def dtext_artist_commentary_desc
+      DText.from_html(artist_commentary_desc).strip
+    end
+
+    def find_largest(image_url)
+      parsed_image = Source::URL.parse(image_url)
+      if parsed_image.full_image_url.present?
+        image_url_html(parsed_image.full_image_url)&.at("img[src*='/#{parsed_image.directory}/']")&.[](:src)
+      elsif parsed_image.variants.present?
+        # Look for the biggest available version on media.tumblr.com. A bigger
+        # version may or may not exist.
+        parsed_image.variants.find { |variant| http_exists?(variant) }
+      else
+        parsed_image.original_url
+      end
+    end
+
+    def post_url_from_image_html
+      extracted = image_url_html(parsed_url)&.at("[href*='/post/']")&.[](:href)
+      Source::URL.parse(extracted)
+    end
+
+    def image_url_html(image_url)
+      resp = http.cache(1.minute).headers(accept: "text/html").get(image_url)
+      return nil if resp.code != 200
+      resp.parse
+    end
+
+    def inline_images
+      html = Nokogiri::HTML5.fragment(artist_commentary_desc)
+      html.css("img").map { |node| node["src"] }
+    end
+
+    def artist_name
+      parsed_url.blog_name || parsed_referer&.blog_name || post_url_from_image_html&.blog_name
+    end
+
+    def work_id
+      parsed_url.work_id || parsed_referer&.work_id || post_url_from_image_html&.work_id
+    end
+
+    def api_response
+      return {} unless self.class.enabled?
+      return {} unless artist_name.present? && work_id.present?
+
+      response = http.cache(1.minute).get(
+        "https://api.tumblr.com/v2/blog/#{artist_name}/posts",
+        params: { id: work_id, api_key: Danbooru.config.tumblr_consumer_key }
+      )
+
+      return {} if response.code != 200
+      response.parse.with_indifferent_access
+    end
+    memoize :api_response
+
+    def post
+      api_response.dig(:response, :posts)&.first || {}
+    end
+  end
+end
--- a/app/logical/source/extractor/twitter.rb
+++ b/app/logical/source/extractor/twitter.rb
@@ -0,0 +1,150 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Twitter
+class Source::Extractor
+  class Twitter < Source::Extractor
+    # List of hashtag suffixes attached to tag other names
+    # Ex: 西住みほ生誕祭2019 should be checked as 西住みほ
+    # The regexes will not match if there is nothing preceding
+    # the pattern to avoid creating empty strings.
+    COMMON_TAG_REGEXES = [
+      /(?<!\A)生誕祭(?:\d*)\z/,
+      /(?<!\A)誕生祭(?:\d*)\z/,
+      /(?<!\A)版もうひとつの深夜の真剣お絵描き60分一本勝負(?:_\d+)?\z/,
+      /(?<!\A)版深夜の真剣お絵描き60分一本勝負(?:_\d+)?\z/,
+      /(?<!\A)版深夜の真剣お絵かき60分一本勝負(?:_\d+)?\z/,
+      /(?<!\A)深夜の真剣お絵描き60分一本勝負(?:_\d+)?\z/,
+      /(?<!\A)版深夜のお絵描き60分一本勝負(?:_\d+)?\z/,
+      /(?<!\A)版真剣お絵描き60分一本勝(?:_\d+)?\z/,
+      /(?<!\A)版お絵描き60分一本勝負(?:_\d+)?\z/
+    ]
+
+    def self.enabled?
+      Danbooru.config.twitter_api_key.present? && Danbooru.config.twitter_api_secret.present?
+    end
+
+    def match?
+      Source::URL::Twitter === parsed_url
+    end
+
+    def image_urls
+      # https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
+      if parsed_url.image_url?
+        [parsed_url.full_image_url]
+      elsif api_response.present?
+        api_response.dig(:extended_entities, :media).to_a.map do |media|
+          if media[:type] == "photo"
+            media[:media_url_https] + ":orig"
+          elsif media[:type].in?(["video", "animated_gif"])
+            variants = media.dig(:video_info, :variants)
+            videos = variants.select { |variant| variant[:content_type] == "video/mp4" }
+            video = videos.max_by { |v| v[:bitrate].to_i }
+            video[:url]
+          end
+        end
+      else
+        []
+      end
+    end
+
+    def page_url
+      return nil if status_id.blank? || tag_name.blank?
+      "https://twitter.com/#{tag_name}/status/#{status_id}"
+    end
+
+    def profile_url
+      return nil if tag_name.blank?
+      "https://twitter.com/#{tag_name}"
+    end
+
+    def intent_url
+      user_id = api_response.dig(:user, :id_str)
+      return nil if user_id.blank?
+      "https://twitter.com/intent/user?user_id=#{user_id}"
+    end
+
+    def profile_urls
+      [profile_url, intent_url].compact
+    end
+
+    def tag_name
+      if tag_name_from_url.present?
+        tag_name_from_url
+      elsif api_response.present?
+        api_response.dig(:user, :screen_name)
+      else
+        ""
+      end
+    end
+
+    def artist_name
+      if api_response.present?
+        api_response.dig(:user, :name)
+      else
+        tag_name
+      end
+    end
+
+    def artist_commentary_title
+      ""
+    end
+
+    def artist_commentary_desc
+      api_response[:full_text].to_s
+    end
+
+    def tags
+      api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
+        [hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
+      end
+    end
+
+    def normalize_tag(tag)
+      COMMON_TAG_REGEXES.each do |rg|
+        norm_tag = tag.gsub(rg, "")
+        if norm_tag != tag
+          return norm_tag
+        end
+      end
+      tag
+    end
+
+    def dtext_artist_commentary_desc
+      return "" if artist_commentary_desc.blank?
+
+      url_replacements = api_response.dig(:entities, :urls).to_a.map do |obj|
+        [obj[:url], obj[:expanded_url]]
+      end
+      url_replacements += api_response.dig(:extended_entities, :media).to_a.map do |obj|
+        [obj[:url], ""]
+      end
+      url_replacements = url_replacements.to_h
+
+      desc = artist_commentary_desc.unicode_normalize(:nfkc)
+      desc = CGI.unescapeHTML(desc)
+      desc = desc.gsub(%r{https?://t\.co/[a-zA-Z0-9]+}i, url_replacements)
+      desc = desc.gsub(/#([^[:space:]]+)/, '"#\\1":[https://twitter.com/hashtag/\\1]')
+      desc = desc.gsub(/@([a-zA-Z0-9_]+)/, '"@\\1":[https://twitter.com/\\1]')
+      desc.strip
+    end
+
+    def api_client
+      TwitterApiClient.new(Danbooru.config.twitter_api_key, Danbooru.config.twitter_api_secret)
+    end
+
+    def api_response
+      return {} unless self.class.enabled? && status_id.present?
+      api_client.status(status_id)
+    end
+
+    def status_id
+      parsed_url.status_id || parsed_referer&.status_id
+    end
+
+    def tag_name_from_url
+      parsed_url.username || parsed_referer&.username
+    end
+
+    memoize :api_response
+  end
+end
--- a/app/logical/source/extractor/weibo.rb
+++ b/app/logical/source/extractor/weibo.rb
@@ -0,0 +1,102 @@
+# frozen_string_literal: true
+
+# @see Source::URL::Weibo
+module Source
+  class Extractor
+    class Weibo < Source::Extractor
+      def match?
+        Source::URL::Weibo === parsed_url
+      end
+
+      def image_urls
+        if parsed_url.image_url?
+          [parsed_url.full_image_url]
+        elsif api_response.present?
+          if api_response["pics"].present?
+            api_response["pics"].pluck("url").map { |url| Source::URL.parse(url).full_image_url }
+          elsif api_response.dig("page_info", "type") == "video"
+            variants = api_response["page_info"]["media_info"].to_h.values + api_response["page_info"]["urls"].to_h.values
+            largest_video = variants.max_by do |variant|
+              if /template=(?<width>\d+)x(?<height>\d+)/ =~ variant.to_s
+                width.to_i * height.to_i
+              else
+                0
+              end
+            end
+            [largest_video]
+          end
+        else
+          [url]
+        end
+      end
+
+      def page_url
+        return nil unless api_response.present?
+
+        artist_id = api_response["user"]["id"]
+        illust_base62_id = api_response["bid"]
+        "https://www.weibo.com/#{artist_id}/#{illust_base62_id}"
+      end
+
+      def tags
+        return [] if api_response.blank?
+
+        matches = api_response["text"]&.scan(/surl-text">#(.*?)#</).to_a.map { |m| m[0] }
+        matches.map do |match|
+          [match, "https://s.weibo.com/weibo/#{match}"]
+        end
+      end
+
+      def profile_urls
+        [parsed_url.profile_url, parsed_referer&.profile_url].compact.uniq
+      end
+
+      def profile_url
+        "https://www.weibo.com/u/#{artist_id}" if artist_id.present?
+      end
+
+      def artist_name
+        api_response&.dig("user", "screen_name")
+      end
+
+      def artist_id
+        parsed_url.artist_short_id || parsed_referer&.artist_short_id || api_response&.dig("user", "id")
+      end
+
+      def artist_commentary_desc
+        return if api_response.blank?
+
+        api_response["text"]
+      end
+
+      def dtext_artist_commentary_desc
+        DText.from_html(artist_commentary_desc) do |element|
+          if element["href"].present?
+            href = Addressable::URI.heuristic_parse(element["href"])
+            href.site ||= "https://www.weibo.com"
+            href.scheme ||= "https"
+            element["href"] = href.to_s
+          end
+
+          if element["src"].present?
+            src = Addressable::URI.heuristic_parse(element["src"])
+            src.scheme ||= "https"
+            element["src"] = src.to_s
+          end
+        end
+      end
+
+      def api_response
+        return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?
+
+        resp = http.cache(1.minute).get(mobile_url)
+        json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1]
+
+        return {} if json_string.blank?
+
+        JSON.parse(json_string)["status"]
+      end
+      memoize :api_response
+    end
+  end
+end