sources: rename Sources::Strategies to Source::Extractor.

Rename Sources::Strategies to Source::Extractor. A Source::Extractor represents a thing that extracts information from a given URL.
2022-03-24 03:05:10 -05:00
parent 34aa22f90b
commit d9d3c1dfe4
63 changed files with 622 additions and 606 deletions
--- a/app/controllers/sources_controller.rb
+++ b/app/controllers/sources_controller.rb
@@ -4,7 +4,7 @@ class SourcesController < ApplicationController
  respond_to :js, :json, :xml

  def show
-    @source = Sources::Strategies.find(params[:url], params[:ref])
+    @source = Source::Extractor.find(params[:url], params[:ref])

    respond_with(@source.to_h) do |format|
      format.xml { render xml: @source.to_h.to_xml(root: "source") }
--- a/app/logical/iqdb_client.rb
+++ b/app/logical/iqdb_client.rb
@@ -30,15 +30,15 @@ class IqdbClient
      if file.present?
        file = file.tempfile
      elsif url.present?
-        strategy = Sources::Strategies.find(url)
-        raise Error, "Can't do reverse image search: #{url} has multiple images. Enter the URL of a single image." if strategy.image_urls.size > 1
+        extractor = Source::Extractor.find(url)
+        raise Error, "Can't do reverse image search: #{url} has multiple images. Enter the URL of a single image." if extractor.image_urls.size > 1

-        download_url = strategy.image_urls.first
-        file = Sources::Strategies.find(download_url).download_file!(download_url)
+        download_url = extractor.image_urls.first
+        file = Source::Extractor.find(download_url).download_file!(download_url)
      elsif image_url.present?
-        file = Sources::Strategies.find(image_url).download_file!(image_url)
+        file = Source::Extractor.find(image_url).download_file!(image_url)
      elsif file_url.present?
-        file = Sources::Strategies.find(file_url).download_file!(file_url)
+        file = Source::Extractor.find(file_url).download_file!(file_url)
      elsif post_id.present?
        file = Post.find(post_id).file(:preview)
      elsif media_asset_id.present?
--- a/app/logical/post_replacement_processor.rb
+++ b/app/logical/post_replacement_processor.rb
@@ -66,14 +66,14 @@ class PostReplacementProcessor
    return MediaFile.open(file) if file.present?
    raise "No file or source URL provided" if source_url.blank?

-    strategy = Sources::Strategies.find(source_url, referer_url)
-    raise NotImplementedError, "No login credentials configured for #{strategy.site_name}." unless strategy.class.enabled?
+    extractor = Source::Extractor.find(source_url, referer_url)
+    raise NotImplementedError, "No login credentials configured for #{extractor.site_name}." unless extractor.class.enabled?

-    image_urls = strategy.image_urls
+    image_urls = extractor.image_urls
    raise "#{source_url} contains multiple images" if image_urls.size > 1

    image_url = image_urls.first
-    file = strategy.download_file!(image_url)
+    file = extractor.download_file!(image_url)

    [file, image_url]
  end
--- a/app/logical/source/extractor.rb
+++ b/app/logical/source/extractor.rb
@@ -0,0 +1,317 @@
+# frozen_string_literal: true
+
+# A source extractor is used to extract information from a given source URL. It
+# extracts all the images and videos from the URL, as well as metadata such as
+# the tags, commentary, artist name, profile URL, and additional names and URLs
+# for new artist entries.
+#
+# To add a new site, create a subclass of Source::Extractor and implement the following methods:
+#
+# * match? - True if the extractor should be used for this URL.
+# * image_urls - The list of images or videos at this URL. Used during uploads.
+# * page_url - The page containing the images. Used for post sources.
+# * profile_url - The URL of the artist's profile page. Used for artist finding.
+# * profile_urls - Extra profile URLs to add to the artist entry.
+# * tag_name - The artist's login name. Used as the default name for new artist tags.
+# * artist_name - The artist's display name. Used as an other name in new artist entries.
+# * other_names - Extra names used in new artist entries.
+# * tags - The artist's tags for the work. Used by translated tags.
+# * artist_commentary_title - The artist's title of the work. Used for artist commentaries.
+# * artist_commentary_desc - The artist's description of the work. Used for artist commentaries.
+#
+module Source
+  class Extractor
+    extend Memoist
+
+    # The http timeout to download a file.
+    DOWNLOAD_TIMEOUT = 60
+
+    attr_reader :url, :referer_url, :parsed_url, :parsed_referer
+    delegate :site_name, to: :parsed_url
+
+    SUBCLASSES = [
+      Source::Extractor::Pixiv,
+      Source::Extractor::Twitter,
+      Source::Extractor::Tumblr,
+      Source::Extractor::NicoSeiga,
+      Source::Extractor::DeviantArt,
+      Source::Extractor::Moebooru,
+      Source::Extractor::Nijie,
+      Source::Extractor::ArtStation,
+      Source::Extractor::HentaiFoundry,
+      Source::Extractor::Fanbox,
+      Source::Extractor::Mastodon,
+      Source::Extractor::PixivSketch,
+      Source::Extractor::Weibo,
+      Source::Extractor::Newgrounds,
+      Source::Extractor::Skeb,
+      Source::Extractor::Lofter,
+      Source::Extractor::Foundation,
+      Source::Extractor::Plurk,
+      Source::Extractor::Tinami,
+      Source::Extractor::Fantia,
+    ]
+
+    # Should return true if the extractor is configured correctly. Return false
+    # if the extractor requires api keys that have not been configured.
+    def self.enabled?
+      true
+    end
+
+    # Return the extractor for the given `url`. The `url` may be either a
+    # direct image URL, or the URL of a page containing one or more images.
+    #
+    # The `referer_url` is optionally provided when uploading direct image URLs
+    # with the bookmarklet. This will be the page containing the image. This
+    # lets us extract information from sites like Twitter, where the image URL by
+    # itself doesn't have enough information to find the page containing the image.
+    #
+    # @param url [String] The URL to extract information from.
+    # @param referer_url [String, nil] The page URL if `url` is an image URL.
+    # @return [Source::Extractor]
+    def self.find(url, referer_url = nil, default: Extractor::Null)
+      extractor = SUBCLASSES.lazy.map { |extractor| extractor.new(url, referer_url) }.find(&:match?)
+      extractor || default&.new(url, referer_url)
+    end
+
+    # Initialize an extractor. Normally one should call `Source::Extractor.find`
+    # instead of instantiating an extractor directly.
+    #
+    # @param url [String] The URL to extract information form.
+    # @param referer_url [String, nil] The page URL if `url` is an image URL.
+    def initialize(url, referer_url = nil)
+      @url = url.to_s
+      @referer_url = referer_url&.to_s
+
+      @parsed_url = Source::URL.parse(url)
+      @parsed_referer = Source::URL.parse(referer_url) if referer_url.present?
+      @parsed_referer = nil if parsed_url&.site_name != parsed_referer&.site_name
+    end
+
+    # Should return true if this extractor should be used for this URL.
+    # Normally, this should check if the URL is from the right site.
+    #
+    # @return [Boolean]
+    def match?
+      false
+    end
+
+    # The list of image (or video) URLs extracted from the target URL.
+    #
+    # If the target URL is a page, this should be every image on the page. If
+    # the target URL is a single image, this should be the image itself.
+    #
+    # @return [Array<String>]
+    def image_urls
+      []
+    end
+
+    # The URL of the page containing the image, or nil if it can't be found.
+    #
+    # The source of the post will be set to the page URL if it's not possible
+    # to convert the image URL to a page URL for this site.
+    #
+    # For example, for sites like Twitter and Tumblr, it's not possible to
+    # convert image URLs to page URLs, so the page URL will be used as the
+    # source for these sites. For sites like Pixiv and DeviantArt, it is
+    # possible to convert image URLs to page URLs, so the image URL will be
+    # used as the source for these sites. This is determined by whether
+    # `Source::URL#page_url` returns a URL or nil.
+    #
+    # @return [String, nil]
+    def page_url
+      nil
+    end
+
+    # A name to suggest as the artist's tag name when creating a new artist.
+    # This should usually be the artist's login name. It should be plain ASCII,
+    # hopefully unique, and it should follow the rules for tag names (see
+    # TagNameValidator).
+    #
+    # @return [String, nil]
+    def tag_name
+      artist_name
+    end
+
+    # The artists's primary name. If an artist has both a display name and a
+    # login name, this should be the display name. This will be used as an
+    # other name for new artist entries.
+    #
+    # @return [String, nil]
+    def artist_name
+      nil
+    end
+
+    # A list of all names associated with the artist. These names will be suggested
+    # as other names when creating a new artist.
+    #
+    # @return [Array<String>]
+    def other_names
+      [artist_name, tag_name].compact.uniq
+    end
+
+    # A link to the artist's profile page on the site. This will be used for
+    # artist finding purposes, so it needs to match the URL in the artist entry.
+    #
+    # @return [String, nil]
+    def profile_url
+      nil
+    end
+
+    # A list of all profile urls associated with the artist. These urls will
+    # be suggested when creating a new artist.
+    #
+    # @return [Array<String>]
+    def profile_urls
+      [profile_url].compact
+    end
+
+    # The artist's title of the work. Used for the artist commentary.
+    #
+    # @return [String, nil]
+    def artist_commentary_title
+      nil
+    end
+
+    # The artist's description of the work. Used for the artist commentary.
+    #
+    # @return [String, nil]
+    def artist_commentary_desc
+      nil
+    end
+
+    # Download the file at the given url. Raises Danbooru::Http::DownloadError if the download fails, or
+    # Danbooru::Http::FileTooLargeError if the file is too large.
+    #
+    # @return [MediaFile] the downloaded file
+    def download_file!(download_url)
+      response, file = http_downloader.download_media(download_url)
+      file
+    end
+
+    # A http client for API requests.
+    def http
+      Danbooru::Http.new.proxy.public_only
+    end
+
+    # A http client for downloading files.
+    def http_downloader
+      http.timeout(DOWNLOAD_TIMEOUT).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare)
+    end
+
+    def artists
+      ArtistFinder.find_artists(profile_url)
+    end
+
+    # A new artist entry with suggested defaults for when the artist doesn't
+    # exist. Used in Artist.new_with_defaults to prefill the new artist form.
+    def new_artist
+      Artist.new(
+        name: tag_name,
+        other_names: other_names,
+        url_string: profile_urls.join("\n")
+      )
+    end
+
+    def tags
+      (@tags || []).uniq
+    end
+
+    def normalized_tags
+      tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq
+    end
+
+    def normalize_tag(tag)
+      WikiPage.normalize_other_name(tag).downcase
+    end
+
+    def translated_tags
+      translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
+      translated_tags.reject(&:artist?)
+    end
+
+    # Given a tag from the source site, should return an array of corresponding Danbooru tags.
+    def translate_tag(untranslated_tag)
+      return [] if untranslated_tag.blank?
+
+      translated_tag_names = WikiPage.active.other_names_include(untranslated_tag).uniq.pluck(:title)
+      translated_tag_names = TagAlias.to_aliased(translated_tag_names)
+      translated_tags = Tag.where(name: translated_tag_names)
+
+      if translated_tags.empty?
+        normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
+        translated_tags = Tag.nonempty.where(name: normalized_name)
+      end
+
+      translated_tags
+    end
+
+    def dtext_artist_commentary_title
+      self.class.to_dtext(artist_commentary_title)
+    end
+
+    def dtext_artist_commentary_desc
+      self.class.to_dtext(artist_commentary_desc)
+    end
+
+    # A search query that should return any posts that were previously
+    # uploaded from the same source. These may be duplicates, or they may be
+    # other posts from the same gallery.
+    def related_posts_search_query
+      "source:#{url}"
+    end
+
+    def related_posts(limit = 5)
+      Post.system_tag_match(related_posts_search_query).paginate(1, limit: limit)
+    end
+
+    # A hash containing the results of any API calls made by the extractor. For debugging purposes only.
+    def api_response
+      nil
+    end
+
+    def to_h
+      {
+        :artist => {
+          :name => artist_name,
+          :tag_name => tag_name,
+          :other_names => other_names,
+          :profile_url => profile_url,
+          :profile_urls => profile_urls
+        },
+        :artists => artists.as_json(include: :sorted_urls),
+        :image_urls => image_urls,
+        :page_url => page_url,
+        :tags => tags,
+        :normalized_tags => normalized_tags,
+        :translated_tags => translated_tags,
+        :artist_commentary => {
+          :title => artist_commentary_title,
+          :description => artist_commentary_desc,
+          :dtext_title => dtext_artist_commentary_title,
+          :dtext_description => dtext_artist_commentary_desc
+        },
+        :api_response => api_response.to_h
+      }
+    end
+
+    def to_json(*_args)
+      to_h.to_json
+    end
+
+    def http_exists?(url)
+      http_downloader.head(url).status.success?
+    end
+
+    # Convert commentary to dtext by stripping html tags. Sites can override
+    # this to customize how their markup is translated to dtext.
+    def self.to_dtext(text)
+      text = text.to_s
+      text = Rails::Html::FullSanitizer.new.sanitize(text, encode_special_chars: false)
+      text = CGI.unescapeHTML(text)
+      text
+    end
+
+    memoize :http, :http_downloader, :related_posts
+  end
+end
--- a/app/logical/sources/strategies/art_station.rb
+++ b/app/logical/sources/strategies/art_station.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

 # @see Source::URL::ArtStation
-module Sources::Strategies
-  class ArtStation < Base
+class Source::Extractor
+  class ArtStation < Source::Extractor
    def match?
      Source::URL::ArtStation === parsed_url
    end
--- a/app/logical/sources/strategies/deviant_art.rb
+++ b/app/logical/sources/strategies/deviant_art.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

-module Sources
-  module Strategies
-    class DeviantArt < Base
+module Source
+  class Extractor
+    class DeviantArt < Source::Extractor
      def self.enabled?
        Danbooru.config.deviantart_client_id.present? && Danbooru.config.deviantart_client_secret.present?
      end
--- a/app/logical/sources/strategies/fanbox.rb
+++ b/app/logical/sources/strategies/fanbox.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Fanbox
-module Sources
-  module Strategies
-    class Fanbox < Base
+module Source
+  class Extractor
+    class Fanbox < Source::Extractor
      def match?
        Source::URL::Fanbox === parsed_url
      end
--- a/app/logical/sources/strategies/fantia.rb
+++ b/app/logical/sources/strategies/fantia.rb
@@ -1,7 +1,7 @@
 # frozen_string_literal: true

-module Sources::Strategies
-  class Fantia < Base
+class Source::Extractor
+  class Fantia < Source::Extractor
    def self.enabled?
      Danbooru.config.fantia_session_id.present?
    end
--- a/app/logical/sources/strategies/foundation.rb
+++ b/app/logical/sources/strategies/foundation.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Foundation
-module Sources
-  module Strategies
-    class Foundation < Base
+module Source
+  class Extractor
+    class Foundation < Source::Extractor
      def match?
        Source::URL::Foundation === parsed_url
      end
--- a/app/logical/sources/strategies/hentai_foundry.rb
+++ b/app/logical/sources/strategies/hentai_foundry.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::HentaiFoundry
-module Sources
-  module Strategies
-    class HentaiFoundry < Base
+module Source
+  class Extractor
+    class HentaiFoundry < Source::Extractor
      def match?
        Source::URL::HentaiFoundry === parsed_url
      end
--- a/app/logical/sources/strategies/lofter.rb
+++ b/app/logical/sources/strategies/lofter.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Lofter
-module Sources
-  module Strategies
-    class Lofter < Base
+module Source
+  class Extractor
+    class Lofter < Source::Extractor
      def match?
        Source::URL::Lofter === parsed_url
      end
--- a/app/logical/sources/strategies/mastodon.rb
+++ b/app/logical/sources/strategies/mastodon.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

 # @see Source::URL::Mastodon
-module Sources::Strategies
-  class Mastodon < Base
+class Source::Extractor
+  class Mastodon < Source::Extractor
    def match?
      Source::URL::Mastodon === parsed_url
    end
--- a/app/logical/sources/strategies/moebooru.rb
+++ b/app/logical/sources/strategies/moebooru.rb
@@ -1,10 +1,10 @@
 # frozen_string_literal: true

 # @see Source::URL::Moebooru
-module Sources
-  module Strategies
-    class Moebooru < Base
-      delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true
+module Source
+  class Extractor
+    class Moebooru < Source::Extractor
+      delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true
      delegate :site_name, :domain, to: :parsed_url

      def match?
@@ -27,7 +27,7 @@ module Sources
        end
      end

-      # XXX the base strategy excludes artist tags from the translated tags; we don't want that for moebooru.
+      # XXX the base extractor excludes artist tags from the translated tags; we don't want that for moebooru.
      def translated_tags
        tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
      end
@@ -50,8 +50,8 @@ module Sources
      memoize :api_response

      concerning :HelperMethods do
-        def sub_strategy
-          @sub_strategy ||= Sources::Strategies.find(api_response[:source], default: nil)
+        def sub_extractor
+          @sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
        end

        def file_ext
--- a/app/logical/sources/strategies/newgrounds.rb
+++ b/app/logical/sources/strategies/newgrounds.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Newgrounds
-module Sources
-  module Strategies
-    class Newgrounds < Base
+module Source
+  class Extractor
+    class Newgrounds < Source::Extractor
      def match?
        Source::URL::Newgrounds === parsed_url
      end
--- a/app/logical/sources/strategies/nico_seiga.rb
+++ b/app/logical/sources/strategies/nico_seiga.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::NicoSeiga
-module Sources
-  module Strategies
-    class NicoSeiga < Base
+module Source
+  class Extractor
+    class NicoSeiga < Source::Extractor
      def self.enabled?
        Danbooru.config.nico_seiga_user_session.present?
      end
--- a/app/logical/sources/strategies/nijie.rb
+++ b/app/logical/sources/strategies/nijie.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Nijie
-module Sources
-  module Strategies
-    class Nijie < Base
+module Source
+  class Extractor
+    class Nijie < Source::Extractor
      def self.enabled?
        Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present?
      end
--- a/app/logical/sources/strategies/null.rb
+++ b/app/logical/sources/strategies/null.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

-module Sources
-  module Strategies
-    class Null < Base
+module Source
+  class Extractor
+    class Null < Source::Extractor
      def image_urls
        [url]
      end
--- a/app/logical/sources/strategies/pixiv.rb
+++ b/app/logical/sources/strategies/pixiv.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Pixiv
-module Sources
-  module Strategies
-    class Pixiv < Base
+module Source
+  class Extractor
+    class Pixiv < Source::Extractor
      def self.enabled?
        Danbooru.config.pixiv_phpsessid.present?
      end
--- a/app/logical/sources/strategies/pixiv_sketch.rb
+++ b/app/logical/sources/strategies/pixiv_sketch.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::PixivSketch
-module Sources
-  module Strategies
-    class PixivSketch < Base
+module Source
+  class Extractor
+    class PixivSketch < Source::Extractor
      def match?
        Source::URL::PixivSketch === parsed_url
      end
--- a/app/logical/sources/strategies/plurk.rb
+++ b/app/logical/sources/strategies/plurk.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Plurk
-module Sources
-  module Strategies
-    class Plurk < Base
+module Source
+  class Extractor
+    class Plurk < Source::Extractor
      def match?
        Source::URL::Plurk === parsed_url
      end
--- a/app/logical/sources/strategies/skeb.rb
+++ b/app/logical/sources/strategies/skeb.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Skeb
-module Sources
-  module Strategies
-    class Skeb < Base
+module Source
+  class Extractor
+    class Skeb < Extractor
      def match?
        Source::URL::Skeb === parsed_url
      end
--- a/app/logical/sources/strategies/tinami.rb
+++ b/app/logical/sources/strategies/tinami.rb
@@ -1,9 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Tinami
-module Sources
-  module Strategies
-    class Tinami < Base
+module Source
+  class Extractor
+    class Tinami < Source::Extractor

      def match?
        Source::URL::Tinami === parsed_url
--- a/app/logical/sources/strategies/tumblr.rb
+++ b/app/logical/sources/strategies/tumblr.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

 # @see Source::URL::Tumblr
-module Sources::Strategies
-  class Tumblr < Base
+class Source::Extractor
+  class Tumblr < Source::Extractor
    def self.enabled?
      Danbooru.config.tumblr_consumer_key.present?
    end
--- a/app/logical/sources/strategies/twitter.rb
+++ b/app/logical/sources/strategies/twitter.rb
@@ -1,8 +1,8 @@
 # frozen_string_literal: true

 # @see Source::URL::Twitter
-module Sources::Strategies
-  class Twitter < Base
+class Source::Extractor
+  class Twitter < Source::Extractor
    # List of hashtag suffixes attached to tag other names
    # Ex: 西住みほ生誕祭2019 should be checked as 西住みほ
    # The regexes will not match if there is nothing preceding
--- a/app/logical/sources/strategies/weibo.rb
+++ b/app/logical/sources/strategies/weibo.rb
@@ -1,10 +1,9 @@
 # frozen_string_literal: true

 # @see Source::URL::Weibo
-module Sources
-  module Strategies
-    class Weibo < Base
-
+module Source
+  class Extractor
+    class Weibo < Source::Extractor
      def match?
        Source::URL::Weibo === parsed_url
      end
--- a/app/logical/source/url.rb
+++ b/app/logical/source/url.rb
@@ -3,8 +3,8 @@
 # A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a
 # subclass responsible for parsing and extracting information from URLs for that site.
 #
-# Sources::Strategies are the main user of Source::URLs. Each Source::URL subclass usually
-# has a corresponding strategy for extracting data from that site.
+# Source::Extractors are the main user of Source::URLs. Each Source::URL subclass usually
+# has a corresponding extractor for extracting data from that site.
 #
 # To add a new site, create a subclass of Source::URL and implement `#match?` to define
 # which URLs belong to the site, and `#parse` to parse and extract information from the URL.
--- a/app/logical/sources/error.rb
+++ b/app/logical/sources/error.rb
@@ -1,6 +0,0 @@
-# frozen_string_literal: true
-
-module Sources
-  class Error < StandardError
-  end
-end
--- a/app/logical/sources/strategies.rb
+++ b/app/logical/sources/strategies.rb
@@ -1,35 +0,0 @@
-# frozen_string_literal: true
-
-module Sources
-  module Strategies
-    def self.all
-      [
-        Strategies::Pixiv,
-        Strategies::Twitter,
-        Strategies::Tumblr,
-        Strategies::NicoSeiga,
-        Strategies::DeviantArt,
-        Strategies::Moebooru,
-        Strategies::Nijie,
-        Strategies::ArtStation,
-        Strategies::HentaiFoundry,
-        Strategies::Fanbox,
-        Strategies::Mastodon,
-        Strategies::PixivSketch,
-        Strategies::Weibo,
-        Strategies::Newgrounds,
-        Strategies::Skeb,
-        Strategies::Lofter,
-        Strategies::Foundation,
-        Strategies::Plurk,
-        Strategies::Tinami,
-        Strategies::Fantia,
-      ]
-    end
-
-    def self.find(url, referer = nil, default: Strategies::Null)
-      strategy = all.lazy.map { |s| s.new(url, referer) }.detect(&:match?)
-      strategy || default&.new(url, referer)
-    end
-  end
-end
--- a/app/logical/sources/strategies/base.rb
+++ b/app/logical/sources/strategies/base.rb
@@ -1,259 +0,0 @@
-# frozen_string_literal: true
-
-# This is a collection of strategies for extracting information about a
-# resource. At a minimum it tries to extract the artist name and a canonical
-# URL to download the image from. But it can also be used to normalize a URL
-# for use with the artist finder.
-#
-# Design Principles
-#
-# In general you should minimize state. You can safely assume that <tt>url</tt>
-# and <tt>referer_url</tt> will not change over the lifetime of an instance,
-# so you can safely memoize methods and their results. A common pattern is
-# conditionally making an external API call and parsing its response. You should
-# make this call on demand and memoize the response.
-
-module Sources
-  module Strategies
-    class Base
-      extend Memoist
-
-      # The http timeout to download a file.
-      DOWNLOAD_TIMEOUT = 60
-
-      attr_reader :url, :referer_url, :parsed_url, :parsed_referer
-      delegate :site_name, to: :parsed_url
-
-      # Should return true if all prerequisites for using the strategy are met.
-      # Return false if the strategy requires api keys that have not been configured.
-      def self.enabled?
-        true
-      end
-
-      # Extract information from a target URL. The target URL may be either a
-      # direct image URL, or the URL of a HTML page containing one or more
-      # images.
-      #
-      # The referer URL is optionally provided when uploading direct image URLs
-      # with the bookmarklet. This lets us find the page containing the image
-      # for sites like Twitter, where the image URL by itself doesn't have
-      # enough information to find the page containing the image.
-      #
-      # @param url [String] The target URL
-      # @param referer_url [String] If the the target URL is an image URL, this
-      #   should be the HTML page containing the image.
-      def initialize(url, referer_url = nil)
-        @url = url.to_s
-        @referer_url = referer_url&.to_s
-
-        @parsed_url = Source::URL.parse(url)
-        @parsed_referer = Source::URL.parse(referer_url) if referer_url.present?
-        @parsed_referer = nil if parsed_url&.site_name != parsed_referer&.site_name
-      end
-
-      # Should return true if this strategy should be used. By default, checks
-      # if the main url belongs to any of the domains associated with this site.
-      def match?
-        false
-      end
-
-      # Whatever <tt>url</tt> is, this method should return the direct links
-      # to the canonical binary files. It should not be an HTML page. It should
-      # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
-      # downloader will fetch and save to disk.
-      def image_urls
-        []
-      end
-
-      # The URL of the page containing the image, or nil if it can't be found.
-      #
-      # The source of the post will be set to the page URL if it's not possible
-      # to convert the image URL to a page URL for this site.
-      #
-      # For example, for sites like Twitter and Tumblr, it's not possible to
-      # convert image URLs to page URLs, so the page URL will be used as the
-      # source for these sites. For sites like Pixiv and DeviantArt, it is
-      # possible to convert image URLs to page URLs, so the image URL will be
-      # used as the source for these sites. This is determined by whether
-      # `Source::URL#page_url` returns a URL or nil.
-      #
-      # @return [String, nil]
-      def page_url
-        nil
-      end
-
-      # A name to suggest as the artist's tag name when creating a new artist.
-      # This should usually be the artist's account name.
-      def tag_name
-        artist_name
-      end
-
-      # The artists's primary name. If an artist has both a display name and an
-      # account name, this should be the display name.
-      def artist_name
-        nil
-      end
-
-      # A list of all names associated with the artist. These names will be suggested
-      # as other names when creating a new artist.
-      def other_names
-        [artist_name, tag_name].compact.uniq
-      end
-
-      # A link to the artist's profile page on the site. This will be used for
-      # artist finding purposes, so it needs to match the URL in the artist entry.
-      def profile_url
-        nil
-      end
-
-      # A list of all profile urls associated with the artist. These urls will
-      # be suggested when creating a new artist.
-      def profile_urls
-        [profile_url].compact
-      end
-
-      def artist_commentary_title
-        nil
-      end
-
-      def artist_commentary_desc
-        nil
-      end
-
-      # Download the file at the given url. Raises Danbooru::Http::DownloadError if the download fails, or
-      # Danbooru::Http::FileTooLargeError if the file is too large.
-      #
-      # @return [MediaFile] the downloaded file
-      def download_file!(download_url)
-        response, file = http_downloader.download_media(download_url)
-        file
-      end
-
-      # A http client for API requests.
-      def http
-        Danbooru::Http.new.proxy.public_only
-      end
-      memoize :http
-
-      # A http client for downloading files.
-      def http_downloader
-        http.timeout(DOWNLOAD_TIMEOUT).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare)
-      end
-      memoize :http_downloader
-
-      def artists
-        ArtistFinder.find_artists(profile_url)
-      end
-
-      # A new artist entry with suggested defaults for when the artist doesn't
-      # exist. Used in Artist.new_with_defaults to prefill the new artist form.
-      def new_artist
-        Artist.new(
-          name: tag_name,
-          other_names: other_names,
-          url_string: profile_urls.join("\n")
-        )
-      end
-
-      def tags
-        (@tags || []).uniq
-      end
-
-      def normalized_tags
-        tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq
-      end
-
-      def normalize_tag(tag)
-        WikiPage.normalize_other_name(tag).downcase
-      end
-
-      def translated_tags
-        translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
-        translated_tags.reject(&:artist?)
-      end
-
-      # Given a tag from the source site, should return an array of corresponding Danbooru tags.
-      def translate_tag(untranslated_tag)
-        return [] if untranslated_tag.blank?
-
-        translated_tag_names = WikiPage.active.other_names_include(untranslated_tag).uniq.pluck(:title)
-        translated_tag_names = TagAlias.to_aliased(translated_tag_names)
-        translated_tags = Tag.where(name: translated_tag_names)
-
-        if translated_tags.empty?
-          normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
-          translated_tags = Tag.nonempty.where(name: normalized_name)
-        end
-
-        translated_tags
-      end
-
-      def dtext_artist_commentary_title
-        self.class.to_dtext(artist_commentary_title)
-      end
-
-      def dtext_artist_commentary_desc
-        self.class.to_dtext(artist_commentary_desc)
-      end
-
-      # A search query that should return any posts that were previously
-      # uploaded from the same source. These may be duplicates, or they may be
-      # other posts from the same gallery.
-      def related_posts_search_query
-        "source:#{url}"
-      end
-
-      def related_posts(limit = 5)
-        Post.system_tag_match(related_posts_search_query).paginate(1, limit: limit)
-      end
-      memoize :related_posts
-
-      # A hash containing the results of any API calls made by the strategy. For debugging purposes only.
-      def api_response
-        nil
-      end
-
-      def to_h
-        {
-          :artist => {
-            :name => artist_name,
-            :tag_name => tag_name,
-            :other_names => other_names,
-            :profile_url => profile_url,
-            :profile_urls => profile_urls
-          },
-          :artists => artists.as_json(include: :sorted_urls),
-          :image_urls => image_urls,
-          :page_url => page_url,
-          :tags => tags,
-          :normalized_tags => normalized_tags,
-          :translated_tags => translated_tags,
-          :artist_commentary => {
-            :title => artist_commentary_title,
-            :description => artist_commentary_desc,
-            :dtext_title => dtext_artist_commentary_title,
-            :dtext_description => dtext_artist_commentary_desc
-          },
-          :api_response => api_response.to_h
-        }
-      end
-
-      def to_json(*_args)
-        to_h.to_json
-      end
-
-      def http_exists?(url)
-        http_downloader.head(url).status.success?
-      end
-
-      # Convert commentary to dtext by stripping html tags. Sites can override
-      # this to customize how their markup is translated to dtext.
-      def self.to_dtext(text)
-        text = text.to_s
-        text = Rails::Html::FullSanitizer.new.sanitize(text, encode_special_chars: false)
-        text = CGI.unescapeHTML(text)
-        text
-      end
-    end
-  end
-end
--- a/app/models/artist.rb
+++ b/app/models/artist.rb
@@ -156,7 +156,7 @@ class Artist < ApplicationRecord
      end

      if source.present?
-        artist = Sources::Strategies.find(source).new_artist
+        artist = Source::Extractor.find(source).new_artist
        artist.attributes = params
      else
        artist = Artist.new(params)
@@ -252,7 +252,7 @@ class Artist < ApplicationRecord
      elsif query.include?("*")
        where(id: ArtistURL.where_like(:url, query).select(:artist_id))
      elsif query =~ %r{\Ahttps?://}i
-        url = Sources::Strategies.find(query).profile_url || query
+        url = Source::Extractor.find(query).profile_url || query
        ArtistFinder.find_artists(url)
      else
        where(id: ArtistURL.where_like(:url, "*#{query}*").select(:artist_id))
--- a/app/models/artist_url.rb
+++ b/app/models/artist_url.rb
@@ -53,7 +53,7 @@ class ArtistURL < ApplicationRecord
    elsif url.include?("*")
      where_ilike(attr, url)
    else
-      profile_url = Sources::Strategies.find(url).profile_url || url
+      profile_url = Source::Extractor.find(url).profile_url || url
      where(attr => normalize_normalized_url(profile_url))
    end
  end
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -87,7 +87,7 @@ class Post < ApplicationRecord
    )

    if add_artist_tag
-      tag_string = "#{tag_string} #{upload_media_asset.source_strategy&.artists.to_a.map(&:tag).map(&:name).join(" ")}".strip
+      tag_string = "#{tag_string} #{upload_media_asset.source_extractor&.artists.to_a.map(&:tag).map(&:name).join(" ")}".strip
      tag_string += " " if tag_string.present?
    end

@@ -1157,7 +1157,7 @@ class Post < ApplicationRecord
      self.pixiv_id = nil
      return unless web_source?

-      site = Sources::Strategies::Pixiv.new(source)
+      site = Source::Extractor::Pixiv.new(source)
      if site.match?
        self.pixiv_id = site.illust_id
      end
@@ -1265,7 +1265,7 @@ class Post < ApplicationRecord
      return if !web_source?
      return if has_tag?("artist_request") || has_tag?("official_art")
      return if tags.any?(&:artist?)
-      return if Sources::Strategies.find(source).is_a?(Sources::Strategies::Null)
+      return if Source::Extractor.find(source).is_a?(Source::Extractor::Null)

      new_artist_path = Routes.new_artist_path(artist: { source: source })
      warnings.add(:base, "Artist tag is required. \"Create new artist tag\":[#{new_artist_path}]. Ask on the forum if you need naming help")
--- a/app/models/upload.rb
+++ b/app/models/upload.rb
@@ -117,8 +117,8 @@ class Upload < ApplicationRecord
        UploadMediaAsset.new(file: file.tempfile, source_url: "file://#{file.original_filename}")
      end
    elsif source.present?
-      page_url = source_strategy.page_url
-      image_urls = source_strategy.image_urls
+      page_url = source_extractor.page_url
+      image_urls = source_extractor.image_urls

      if image_urls.empty?
        raise Error, "#{source} doesn't contain any images"
@@ -136,14 +136,14 @@ class Upload < ApplicationRecord
    update!(status: "error", error: e.message)
  end

-  def source_strategy
+  def source_extractor
    return nil if source.blank?
-    Sources::Strategies.find(source, referer_url)
+    Source::Extractor.find(source, referer_url)
  end

  def self.available_includes
    [:uploader, :upload_media_assets, :media_assets, :posts]
  end

-  memoize :source_strategy
+  memoize :source_extractor
 end
--- a/app/models/upload_media_asset.rb
+++ b/app/models/upload_media_asset.rb
@@ -79,9 +79,9 @@ class UploadMediaAsset < ApplicationRecord
    end
  end

-  def source_strategy
+  def source_extractor
    return nil if source_url.blank?
-    Sources::Strategies.find(source_url, page_url)
+    Source::Extractor.find(source_url, page_url)
  end

  def async_process_upload!
@@ -98,7 +98,7 @@ class UploadMediaAsset < ApplicationRecord
    if file.present?
      media_file = MediaFile.open(file)
    else
-      media_file = source_strategy.download_file!(source_url)
+      media_file = source_extractor.download_file!(source_url)
    end

    MediaAsset.upload!(media_file) do |media_asset|
@@ -120,5 +120,5 @@ class UploadMediaAsset < ApplicationRecord
    end
  end

-  memoize :source_strategy
+  memoize :source_extractor
 end
--- a/app/views/upload_media_assets/index.html.erb
+++ b/app/views/upload_media_assets/index.html.erb
@@ -2,8 +2,8 @@
  <div id="a-index">
    <h1>Upload</h1>

-    <% if policy(@upload).show? && @upload.source_strategy.present? %>
-      <%= render_source_data(@upload.source_strategy) %>
+    <% if policy(@upload).show? && @upload.source_extractor.present? %>
+      <%= render_source_data(@upload.source_extractor) %>
    <% end %>

    <div class="border-b mb-4 flex flex-wrap gap-4">
--- a/app/views/uploads/_single_asset_upload.html.erb
+++ b/app/views/uploads/_single_asset_upload.html.erb
@@ -34,10 +34,10 @@
    </p>
  </div>

-  <%= render "uploads/related_posts", source: upload_media_asset.source_strategy %>
+  <%= render "uploads/related_posts", source: upload_media_asset.source_extractor %>

-  <% if upload_media_asset.source_strategy.present? %>
-    <%= render_source_data(upload_media_asset.source_strategy) %>
+  <% if upload_media_asset.source_extractor.present? %>
+    <%= render_source_data(upload_media_asset.source_extractor) %>
  <% end %>

  <% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>