Refactor sources

2018-08-06 17:39:25 -07:00
parent 54363ffecf
commit 762dc3da24
71 changed files with 2340 additions and 2430 deletions
--- a/app/logical/sources/site.rb
+++ b/app/logical/sources/site.rb
@@ -1,78 +0,0 @@
-# encoding: UTF-8
-
-module Sources
-  class Site
-    class NoStrategyError < RuntimeError ; end
-
-    attr_reader :strategy
-    delegate :url, :get, :get_size, :site_name, :artist_name,
-      :profile_url, :image_url, :tags, :artists, :unique_id,
-      :file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
-      :artist_commentary_title, :artist_commentary_desc,
-      :dtext_artist_commentary_title, :dtext_artist_commentary_desc,
-      :rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
-
-    def self.strategies
-      [Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
-    end
-
-    def initialize(url, referer_url: nil)
-      @url = url
-      
-      Site.strategies.each do |strategy|
-        if strategy.url_match?(url) || strategy.url_match?(referer_url)
-          @strategy = strategy.new(url, referer_url)
-          return
-        end
-      end
-
-      raise NoStrategyError.new
-    end
-
-    def referer_url
-      strategy.try(:referer_url)
-    end
-
-    def normalized_for_artist_finder?
-      available? && strategy.normalized_for_artist_finder?
-    end
-
-    def normalize_for_artist_finder!
-      if available? && strategy.normalizable_for_artist_finder?
-        strategy.normalize_for_artist_finder!
-      else
-        url
-      end
-    rescue
-      url
-    end
-
-    def to_h
-      return {
-        :artist_name => artist_name,
-        :artists => artists.as_json(include: :sorted_urls),
-        :profile_url => profile_url,
-        :image_url => image_url,
-        :image_urls => image_urls,
-        :normalized_for_artist_finder_url => normalize_for_artist_finder!,
-        :tags => tags,
-        :translated_tags => translated_tags,
-        :unique_id => unique_id,
-        :artist_commentary => {
-          :title => artist_commentary_title,
-          :description => artist_commentary_desc,
-          :dtext_title => dtext_artist_commentary_title,
-          :dtext_description => dtext_artist_commentary_desc,
-        }
-      }
-    end
-
-    def to_json
-      to_h.to_json
-    end
-
-    def available?
-      strategy.present?
-    end
-  end
-end
--- a/app/logical/sources/strategies.rb
+++ b/app/logical/sources/strategies.rb
@@ -0,0 +1,29 @@
+module Sources
+  module Strategies
+    def self.all
+      return [
+        Strategies::Pixiv, 
+        Strategies::NicoSeiga, 
+        Strategies::Twitter, 
+        Strategies::DeviantArt, 
+        Strategies::Tumblr, 
+        Strategies::ArtStation, 
+        Strategies::Nijie, 
+        Strategies::Pawoo,
+        Strategies::Moebooru,
+
+        Strategies::Null # MUST BE LAST!
+      ]
+    end
+
+    def self.find(url, referer=nil)
+      all
+        .detect { |strategy| strategy.match?(url, referer) }
+        .new(url, referer)
+    end
+
+    def self.canonical(url, referer)
+      find(url, referer).canonical_url
+    end
+  end
+end
--- a/app/logical/sources/strategies/art_station.rb
+++ b/app/logical/sources/strategies/art_station.rb
@@ -1,68 +1,165 @@
 module Sources::Strategies
  class ArtStation < Base
+    PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)/?\z!i
+    ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i
+    PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i
+    PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i
+    PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i
+    PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}!
+
    attr_reader :json, :image_urls

-    def self.url_match?(url)
-      self.project_id(url).present?
+    def self.match?(*urls)
+      urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)}
    end

    # https://www.artstation.com/artwork/04XA4
    # https://www.artstation.com/artwork/cody-from-sf
    # https://sa-dui.artstation.com/projects/DVERn
    def self.project_id(url)
-      if url =~ %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)\z!i
+      if url =~ PROJECT
        $~[:project_id]
      else
        nil
      end
    end

-    def referer_url
-      if self.class.url_match?(@referer_url)
-        @referer_url
-      else
-        @url
-      end
-    end
-
    def site_name
      "ArtStation"
    end

-    def project_id
-      self.class.project_id(referer_url)
+    def image_urls
+      image_urls_sub
+        .map { |asset| original_asset_url(asset) }
    end
+    memoize :image_urls

    def page_url
-      "https://www.artstation.com/artwork/#{project_id}"
+      [url, referer_url].each do |x|
+        if x =~ PROJECT
+          return "https://www.artstation.com/artwork/#{$~[:project_id]}"
+        end
+      end
+
+      return super
    end

+    def profile_url
+      if url =~ PROFILE1 && $1 != "www"
+        return "https://www.artstation.com/#{$1}"
+      end
+
+      if url =~ PROFILE2
+        return "https://www.artstation.com/#{$1}"
+      end
+
+      if url =~ PROFILE3 && url !~ PROJECT
+        return url
+      end
+
+      api_json["user"]["permalink"]
+    end
+
+    def artist_name
+      api_json["user"]["username"]
+    end
+
+    def artist_commentary_title
+      api_json["title"]
+    end
+
+    def artist_commentary_desc
+      ActionView::Base.full_sanitizer.sanitize(api_json["description"])
+    end
+    memoize :artist_commentary_desc
+
+    def tags
+      return nil if !api_json.has_key?("tags")
+
+      api_json["tags"].
+        map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]}
+    end
+    memoize :tags
+
+    def normalized_for_artist_finder?
+      url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT
+    end
+
+    def normalizable_for_artist_finder?
+      url =~ PROFILE || url =~ PROJECT
+    end
+
+    def normalize_for_artist_finder
+      profile_url
+    end
+
+  public
+
+    def image_urls_sub
+      if url.match?(ASSET)
+        return [url]
+      end
+
+      api_json["assets"]
+        .select { |asset| asset["asset_type"] == "image" }
+        .map { |asset| asset["image_url"] }
+    end
+
+    # these are de facto private methods but are public for testing
+    # purposes
+
+    def project_id
+      self.class.project_id(url) || self.class.project_id(referer_url)
+    end
+    memoize :project_id
+
    def api_url
      "https://www.artstation.com/projects/#{project_id}.json"
    end

-    def image_url
-      image_urls.first
+    def api_json
+      if project_id.nil?
+        raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})")
+      end
+
+      resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
+
+      if resp.success?
+        json = JSON.parse(resp.body)
+      else
+        raise HTTParty::ResponseError.new(resp)
+      end
+
+      return json
+    end
+    memoize :api_json
+
+    # Returns the original representation of the asset, if it exists. Otherwise
+    # return the url.
+    def original_asset_url(x)
+      if x =~ ASSET
+        # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
+        original_url = x.sub(%r!/(?:medium|small|large)/!, "/original/")
+
+        if http_exists?(original_url, headers)
+          return original_url
+        end
+
+        if x =~ /medium|small/
+          large_url = x.sub(%r!/(?:medium|small)/!, "/large/")
+
+          if http_exists?(large_url, headers)
+            return large_url
+          end
+        end
+      end
+
+      return x
    end

-    def get
-      resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
-      image_url_rewriter = Downloads::RewriteStrategies::ArtStation.new
-      if resp.success?
-        @json = JSON.parse(resp.body)
-        @artist_name = json["user"]["username"]
-        @profile_url = json["user"]["permalink"]
-        images = json["assets"].select { |asset| asset["asset_type"] == "image" }
-        @image_urls = images.map do |x|
-          y, _, _ = image_url_rewriter.rewrite(x["image_url"], nil)
-          y
-        end
-        @tags = json["tags"].map {|x| [x.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(x)]} if json["tags"]
-        @artist_commentary_title = json["title"]
-        @artist_commentary_desc = ActionView::Base.full_sanitizer.sanitize(json["description"])
-      else
-        raise "HTTP error code: #{resp.code} #{resp.message}"
-      end
+    def tag_url(name)
+      "https://www.artstation.com/search?q=" + CGI.escape(name)
    end
+
  end
 end
--- a/app/logical/sources/strategies/base.rb
+++ b/app/logical/sources/strategies/base.rb
@@ -1,41 +1,108 @@
 # This is a collection of strategies for extracting information about a 
 # resource. At a minimum it tries to extract the artist name and a canonical 
 # URL to download the image from. But it can also be used to normalize a URL 
-# for use with the artist finder. It differs from Downloads::RewriteStrategies
-# in that the latter is more for normalizing and rewriting a URL until it is 
-# suitable for downloading, whereas Sources::Strategies is more for meta-data 
-# that can only be obtained by downloading and parsing the resource.
+# for use with the artist finder. 
+#
+# Design Principles
+#
+# In general you should minimize state. You can safely assume that <tt>url</tt>
+# and <tt>referer_url</tt> will not change over the lifetime of an instance,
+# so you can safely memoize methods and their results. A common pattern is
+# conditionally making an external API call and parsing its response. You should
+# make this call on demand and memoize the response.

 module Sources
  module Strategies
    class Base
      attr_reader :url, :referer_url
-      attr_reader :artist_name, :profile_url, :image_url, :tags
-      attr_reader :artist_commentary_title, :artist_commentary_desc

-      def self.url_match?(url)
+      extend Memoist
+
+      def self.match?(*urls)
        false
      end

+      # * <tt>url</tt> - Should point to a resource suitable for 
+      #   downloading. This may sometimes point to the binary file. 
+      #   It may also point to the artist's profile page, in cases
+      #   where this class is being used to normalize artist urls.
+      #   Implementations should be smart enough to detect this and 
+      #   behave accordingly.
+      # * <tt>referer_url</tt> - Sometimes the HTML page cannot be
+      #   determined from <tt>url</tt>. You should generally pass in a
+      #   <tt>referrer_url</tt> so the strategy can discover the HTML
+      #   page and other information.
      def initialize(url, referer_url = nil)
        @url = url
        @referer_url = referer_url
      end

-      # No remote calls are made until this method is called.
-      def get
+      def site_name
        raise NotImplementedError
      end

-      def get_size
-        @get_size ||= Downloads::File.new(@image_url).size
+      # Whatever <tt>url</tt> is, this method should return the direct links 
+      # to the canonical binary files. It should not be an HTML page. It should 
+      # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the 
+      # downloader will fetch and save to disk.
+      def image_urls
+        raise NotImplementedError
      end

+      def image_url
+        image_urls.first
+      end
+
+      # Whatever <tt>url</tt> is, this method should return a link to the HTML
+      # page containing the resource. It should not be a binary file. It will
+      # eventually be assigned as the source for the post, but it does not
+      # represent what the downloader will fetch.
+      def page_url
+        Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found"
+
+        return nil
+      end
+
+      # This will be the url stored in posts. Typically this is the page
+      # url, but on some sites it may be preferable to store the image url.
+      def canonical_url
+        page_url
+      end
+
+      # A link to the artist's profile page on the site.
+      def profile_url
+        nil
+      end
+
+      def artist_name
+        raise NotImplementedError
+      end
+
+      def artist_commentary_title
+        nil
+      end
+
+      def artist_commentary_desc
+        nil
+      end
+
+      # Subclasses should merge in any required headers needed to access resources
+      # on the site.
+      def headers
+        return Danbooru.config.http_headers
+      end
+
+      # Returns the size of the image resource without actually downloading the file.
+      def size
+        Downloads::File.new(image_url).size
+      end
+      memoize :size
+
      # Subclasses should return true only if the URL is in its final normalized form.
      #
-      # Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
+      # Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
      # => true
-      # Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
+      # Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
      # => false
      def normalized_for_artist_finder?
        false
@@ -44,32 +111,33 @@ module Sources
      # Subclasses should return true only if the URL is a valid URL that could
      # be converted into normalized form.
      #
-      # Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
+      # Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
      # => true
-      # Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
+      # Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
      # => false
      def normalizable_for_artist_finder?
        false
      end

-      def normalize_for_artist_finder!
-        url
-      end
-
-      def site_name
-        raise NotImplementedError
+      def normalize_for_artist_finder
+        profile_url || url
      end

+      # A unique identifier for the artist. This is used for artist creation.
      def unique_id
        artist_name
      end

      def artists
-        Artist.find_artists(url, referer_url)
+        Artist.find_artists(profile_url)
      end

-      def image_urls
-        [image_url]
+      def file_url
+        image_url
+      end
+
+      def data
+        {}
      end

      def tags
@@ -97,11 +165,6 @@ module Sources
        translated_tags
      end

-      # Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
-      def fake_referer
-        nil
-      end
-
      def dtext_artist_commentary_title
        self.class.to_dtext(artist_commentary_title)
      end
@@ -110,9 +173,40 @@ module Sources
        self.class.to_dtext(artist_commentary_desc)
      end

+      # A strategy may return extra data unrelated to the file
+      def data
+        return {}
+      end
+
+      def to_h
+        return {
+          :artist_name => artist_name,
+          :artists => artists.as_json(include: :sorted_urls),
+          :profile_url => profile_url,
+          :image_url => image_url,
+          :image_urls => image_urls,
+          :normalized_for_artist_finder_url => normalize_for_artist_finder,
+          :tags => tags,
+          :translated_tags => translated_tags,
+          :unique_id => unique_id,
+          :artist_commentary => {
+            :title => artist_commentary_title,
+            :description => artist_commentary_desc,
+            :dtext_title => dtext_artist_commentary_title,
+            :dtext_description => dtext_artist_commentary_desc,
+          }
+        }
+      end
+
+      def to_json
+        to_h.to_json
+      end
+
    protected
-      def agent
-        raise NotImplementedError
+
+      def http_exists?(url, headers)
+        res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
+        res.success?
      end

      # Convert commentary to dtext by stripping html tags. Sites can override
--- a/app/logical/sources/strategies/deviant_art.rb
+++ b/app/logical/sources/strategies/deviant_art.rb
@@ -1,44 +1,127 @@
 module Sources
  module Strategies
    class DeviantArt < Base
-      extend Memoist
+      ATTRIBUTED_ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
+      ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
+      PATH_ART = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
+      RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
+      SUBDOMAIN_ART = %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
+      PROFILE = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/?\z}

-      def self.url_match?(url)
-        url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
-      end
-
-      def self.normalize(url)
-        if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
-          "http://fav.me/d#{$1}"
-        elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
-          "http://fav.me/d#{$1}"
-        elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
-          url
-        elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
-          "http://www.deviantart.com/#{$1}#{$2}"
-        else
-          url
-        end
-      end
-
-      def referer_url
-        if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
-          @referer_url
-        else
-          @url
-        end
+      def self.match?(*urls)
+        urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
      end

      def site_name
        "Deviant Art"
      end

-      def unique_id
-        artist_name
+      def image_urls
+        # normalize thumbnails
+        if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
+          match = $1
+          return [url.sub(match + "200H/", match)]
+        end
+
+        if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
+          match = $1
+          return [url.sub(match + "PRE/", match)]
+        end
+
+        # return direct links
+        if url =~ ATTRIBUTED_ASSET || url =~ ASSET
+          return [url]
+        end
+
+        # work is deleted, use image url as given by user.
+        if uuid.nil?
+          return [url]
+        end
+
+        # work is downloadable
+        if api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
+          src = api_download[:src]
+          src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
+          src.gsub!(/\?.*\z/, "") # strip s3 query params
+          src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
+
+          return [src]
+        end
+
+        # work isn't downloadable, or download size is same as regular size.
+        if api_deviation.present?
+          return [api_deviation.dig(:content, :src)]
+        end
+
+        raise "Couldn't find image url"
      end

-      def get
-        # no-op
+      def page_url
+        [url, referer_url].each do |x|
+          if x =~ ATTRIBUTED_ASSET
+            return "http://fav.me/d#{$1}"
+          end
+
+          if x =~ ASSET
+            return "http://fav.me/d#{$1}"
+          end
+
+          if x =~ PATH_ART
+            return x
+          end
+
+          if x !~ RESERVED_SUBDOMAINS && x =~ SUBDOMAIN_ART
+            return "http://www.deviantart.com/#{$1}#{$2}"
+          end
+        end
+
+        return super
+      end
+
+      def profile_url
+        if url =~ PROFILE
+          return url
+        end
+
+        if artist_name.blank?
+          return nil
+        end
+
+        return "https://www.deviantart.com/#{artist_name}"
+      end
+
+      def artist_name
+        api_metadata.dig(:author, :username).try(&:downcase)
+      end
+
+      def artist_commentary_title
+        api_metadata[:title]
+      end
+
+      def artist_commentary_desc
+        api_metadata[:description]
+      end
+
+      def normalized_for_artist_finder?
+        url =~ PROFILE
+      end
+
+      def normalizable_for_artist_finder?
+        url =~ PATH_ART || url =~ SUBDOMAIN_ART
+      end
+
+      def normalize_for_artist_finder
+        profile_url
+      end
+
+      def tags
+        if api_metadata.blank?
+          return []
+        end
+
+        api_metadata[:tags].map do |tag|
+          [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
+        end
      end

      def dtext_artist_commentary_desc
@@ -71,75 +154,24 @@ module Sources
        end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
      end

-      def artist_name
-        api_metadata.dig(:author, :username).try(&:downcase)
-      end
-
-      def profile_url
-        return "" if artist_name.blank?
-        "https://www.deviantart.com/#{artist_name}"
-      end
-
-      def image_url
-        # work is deleted, use image url as given by user.
-        if uuid.nil?
-          url
-        # work is downloadable
-        elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
-          src = api_download[:src]
-          src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
-          src.gsub!(/\?.*\z/, "") # strip s3 query params
-          src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
-
-          src
-        # work isn't downloadable, or download size is same as regular size.
-        elsif api_deviation.present?
-          api_deviation.dig(:content, :src)
-        else
-          raise "couldn't find image url"
-        end
-      end
-
-      def tags
-        return [] if api_metadata.blank?
-
-        api_metadata[:tags].map do |tag|
-          [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
-        end
-      end
-
-      def artist_commentary_title
-        api_metadata[:title]
-      end
-
-      def artist_commentary_desc
-        api_metadata[:description]
-      end
-
-      def normalizable_for_artist_finder?
-        url !~ %r!^https?://www.deviantart.com/!
-      end
-
-      def normalized_for_artist_finder?
-        url =~ %r!^https?://www.deviantart.com/! 
-      end
-
-      def normalize_for_artist_finder!
-        profile_url
-      end
-
-      protected
-
-      def normalized_url
-        @normalized_url ||= self.class.normalize(url)
-      end
+    public

      def page
-        options = Danbooru.config.httparty_options.deep_merge(format: :plain, headers: { "Accept-Encoding" => "gzip" })
-        resp = HTTParty.get(normalized_url, **options)
-        body = Zlib.gunzip(resp.body)
+        options = Danbooru.config.httparty_options.deep_merge(
+          format: :plain, 
+          headers: { "Accept-Encoding" => "gzip" }
+        )
+        resp = HTTParty.get(page_url, **options)
+
+        if resp.success?
+          body = Zlib.gunzip(resp.body)
+        else
+          raise HTTParty::ResponseError.new(resp)
+        end
+
        Nokogiri::HTML(body)
      end
+      memoize :page

      # Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
      # For private works the UUID will be nil.
@@ -151,29 +183,39 @@ module Sources
        uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
        uuid
      end
+      memoize :uuid

      def api_client
-        api_client = DeviantArtApiClient.new(Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options)
-        api_client.access_token = Cache.get("da-access-token", 55.minutes) { api_client.access_token.to_hash }
+        api_client = DeviantArtApiClient.new(
+          Danbooru.config.deviantart_client_id, 
+          Danbooru.config.deviantart_client_secret, 
+          Danbooru.config.httparty_options
+        )
+        api_client.access_token = Cache.get("da-access-token", 55.minutes) do
+          api_client.access_token.to_hash
+        end
        api_client
      end
+      memoize :api_client

      def api_deviation
        return {} if uuid.nil?
        api_client.deviation(uuid)
      end
+      memoize :api_deviation

      def api_metadata
        return {} if uuid.nil?
        api_client.metadata(uuid)[:metadata].first
      end
+      memoize :api_metadata

      def api_download
        return {} if uuid.nil?
        api_client.download(uuid)
      end
+      memoize :api_download

-      memoize :page, :uuid, :api_client, :api_deviation, :api_metadata, :api_download
    end
  end
 end
--- a/app/logical/sources/strategies/moebooru.rb
+++ b/app/logical/sources/strategies/moebooru.rb
@@ -0,0 +1,35 @@
+module Sources
+  module Strategies
+    class Moebooru < Base
+      DOMAINS = /(?:[^.]+\.)?yande\.re|konachan\.com/
+
+      def self.match?(*urls)
+        urls.compact.any? { |x| x.match?(DOMAINS) }
+      end
+
+      def site_name
+        URI.parse(url).host
+      end
+
+      def image_url
+        if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
+          return $1 + "/image/" + $2 + ".png"
+        end
+
+        return url
+      end
+
+      def page_url
+        return url
+      end
+
+      def profile_url
+        return url
+      end
+
+      def artist_name
+        return ""
+      end
+    end
+  end
+end
--- a/app/logical/sources/strategies/nico_seiga.rb
+++ b/app/logical/sources/strategies/nico_seiga.rb
@@ -1,182 +1,188 @@
 module Sources
  module Strategies
    class NicoSeiga < Base
-      extend Memoist
-      
-      def self.url_match?(url)
-        url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/
-      end
+      URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
+      DIRECT = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
+      PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
+      PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i

-      def referer_url
-        if @referer_url =~ /seiga\.nicovideo\.jp\/seiga\/im\d+/ && @url =~ /http:\/\/lohas\.nicoseiga\.jp\/(?:priv|o)\//
-          @referer_url
-        else
-          @url
-        end
+      def self.match?(*urls)
+        urls.compact.any? { |x| x.match?(URL) }
      end

      def site_name
        "Nico Seiga"
      end

-      def unique_id
-        profile_url =~ /\/illust\/(\d+)/
-        "nicoseiga" + $1
-      end
-
-      def get
-        page = load_page
-
-        @artist_name, @profile_url = get_profile_from_api
-        @image_url = get_image_url_from_page(page)
-        @artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api
-
-        # Log out before getting the tags.
-        # The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily.
-        # This does not apply if you're logged out (or if you're viewing an adult-rated work).
-        agent.cookie_jar.clear!
-        agent.get(normalized_url) do |page|
-          @tags = get_tags_from_page(page)
-        end
-      end
-
-      def normalized_for_artist_finder?
-        url =~ %r!https?://seiga\.nicovideo\.jp/user/illust/\d+/!i
-      end
-
-      def normalizable_for_artist_finder?
-        url =~ %r!https?://seiga\.nicovideo\.jp/seiga/im\d+!i
-      end
-
-      def normalize_for_artist_finder!
-        page = load_page
-        @illust_id = get_illust_id_from_url
-        @artist_name, @profile_url = get_profile_from_api
-        @profile_url + "/"
-      end
-
-    protected
-
-      def api_client
-        NicoSeigaApiClient.new(get_illust_id_from_url)
-      end
-
-      def get_illust_id_from_url
-        if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)!
-          $1.to_i
-        else
-          nil
-        end
-      end
-
-      def load_page
-        page = agent.get(normalized_url)
-
-        if page.search("a#link_btn_login").any?
-          # Session cache is invalid, clear it and log in normally.
-          Cache.delete("nico-seiga-session")
-          @agent = nil
-          page = agent.get(normalized_url)
+      def image_urls
+        if url =~ DIRECT
+          return [url]
        end

-        page
-      end
-
-      def get_profile_from_api
-        return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"]
-      end
-
-      def get_image_url_from_page(page)
        link = page.search("a#illust_link")

        if link.any?
          image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
          page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
+
          if page.is_a?(Mechanize::Image)
-            return page.uri.to_s
+            return [page.uri.to_s]
          end
+
          images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
+
          if images.any?
-            image_url = "http://lohas.nicoseiga.jp" + images[0]["data-src"]
-          end
-        else
-          image_url = nil
-        end
-
-        return image_url
-      end
-
-      def get_tags_from_page(page)
-        links = page.search("a.tag")
-
-        links.map do |node|
-          [node.text, "http://seiga.nicovideo.jp" + node.attr("href")]
-        end
-      end
-
-      def get_artist_commentary_from_api
-        [api_client.title, api_client.desc]
-      end
-
-      def normalized_url
-        @normalized_url ||= begin
-          if url =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{/seiga/im\d+}
-            url
-          else
-            nil
+            return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
          end
        end
+
+        raise "image url not found for (#{url}, #{referer_url})"
      end

+      def page_url
+        [url, referer_url].each do |x|
+          if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
+          end
+
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
+          end
+
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
+          end
+
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
+          end
+
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
+          end
+
+          if x =~ %r{/seiga/im\d+}
+            return x
+          end
+        end
+
+        return super
+      end
+
+      def profile_url
+        if url =~ PROFILE
+          return url
+        end
+
+        "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
+      end
+
+      def artist_name
+        api_client.moniker
+      end
+
+      def artist_commentary_title
+        api_client.title
+      end
+
+      def artist_commentary_desc
+        api_client.desc
+      end
+
+      def headers
+        super.merge(
+          "Referer" => "https://seiga.nicovideo.jp"
+        )
+      end
+
+      def normalized_for_artist_finder?
+        url =~ PROFILE
+      end
+
+      def normalizable_for_artist_finder?
+        url =~ PAGE || url =~ PROFILE
+      end
+
+      def normalize_for_artist_finder
+        "#{profile_url}/"
+      end
+
+      def unique_id
+        "nicoseiga#{api_client.user_id}"
+      end
+
+      def tags
+        string = page.at("meta[name=keywords]").try(:[], "content") || ""
+        string.split(/,/).map do |name|
+          [name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
+        end
+      end
+      memoize :tags
+
+    public
+
+      def api_client
+        NicoSeigaApiClient.new(illust_id)
+      end
+      memoize :api_client
+
+      def illust_id
+        if page_url =~ PAGE
+          return $1.to_i
+        end
+
+        return nil
+      end
+
+      def page
+        doc = agent.get(page_url)
+
+        if doc.search("a#link_btn_login").any?
+          # Session cache is invalid, clear it and log in normally.
+          Cache.delete("nico-seiga-session")
+          doc = agent.get(page_url)
+        end
+
+        doc
+      end
+      memoize :page
+
      def agent
-        @agent ||= begin
-          mech = Mechanize.new
-          mech.redirect_ok = false
-          mech.keep_alive = false
+        mech = Mechanize.new
+        mech.redirect_ok = false
+        mech.keep_alive = false

-          session = Cache.get("nico-seiga-session")
-          if session
-            cookie = Mechanize::Cookie.new("user_session", session)
-            cookie.domain = ".nicovideo.jp"
-            cookie.path = "/"
-            mech.cookie_jar.add(cookie)
-          else
-            mech.get("https://account.nicovideo.jp/login") do |page|
-              page.form_with(:id => "login_form") do |form|
-                form["mail_tel"] = Danbooru.config.nico_seiga_login
-                form["password"] = Danbooru.config.nico_seiga_password
-              end.click_button
-            end
-            session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first
-            if session
-              Cache.put("nico-seiga-session", session.value, 1.month)
-            else
-              raise "Session not found"
-            end
-          end
-
-          # This cookie needs to be set to allow viewing of adult works
-          cookie = Mechanize::Cookie.new("skip_fetish_warning", "1")
-          cookie.domain = "seiga.nicovideo.jp"
+        session = Cache.get("nico-seiga-session")
+        if session
+          cookie = Mechanize::Cookie.new("user_session", session)
+          cookie.domain = ".nicovideo.jp"
          cookie.path = "/"
          mech.cookie_jar.add(cookie)
-
-          mech.redirect_ok = true
-          mech
+        else
+          mech.get("https://account.nicovideo.jp/login") do |page|
+            page.form_with(:id => "login_form") do |form|
+              form["mail_tel"] = Danbooru.config.nico_seiga_login
+              form["password"] = Danbooru.config.nico_seiga_password
+            end.click_button
+          end
+          session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first
+          if session
+            Cache.put("nico-seiga-session", session.value, 1.month)
+          else
+            raise "Session not found"
+          end
        end
-      end

-      memoize :api_client
+        # This cookie needs to be set to allow viewing of adult works
+        cookie = Mechanize::Cookie.new("skip_fetish_warning", "1")
+        cookie.domain = "seiga.nicovideo.jp"
+        cookie.path = "/"
+        mech.cookie_jar.add(cookie)
+
+        mech.redirect_ok = true
+        mech
+      end
+      memoize :agent
    end
  end
 end
--- a/app/logical/sources/strategies/nijie.rb
+++ b/app/logical/sources/strategies/nijie.rb
@@ -1,155 +1,158 @@
 module Sources
  module Strategies
    class Nijie < Base
-      attr_reader :image_urls
+      PICTURE = %r{pic\d+\.nijie.info/nijie_picture/}
+      PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+}
+      DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i

-      def self.url_match?(url)
-        url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/
-      end
-
-      def initialize(url, referer_url=nil)
-        super(normalize_url(url), normalize_url(referer_url))
-      end
-
-      def referer_url
-        if @referer_url =~ /nijie\.info\/view\.php.+id=\d+/ && @url =~ /pic\d+\.nijie.info\/nijie_picture\//
-          @referer_url
-        else
-          @url
-        end
+      def self.match?(*urls)
+        urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) }
      end

      def site_name
        "Nijie"
      end

+      def image_urls
+        if url =~ PICTURE
+          return [url]
+        end
+
+        # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
+        # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
+        if url =~ DIFF
+          return [normalize_thumbnails(url)]
+        end
+
+        page.search("div#gallery a > img").map do |img|
+          # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
+          # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
+          normalize_thumbnails("https:" + img.attr("src"))
+        end.uniq
+      end
+
+      def page_url
+        [url, referer_url].each do |x|
+          if x =~ PAGE
+            return x
+          end
+
+          if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
+            return "https://nijie.info/view.php?id=#{$1}"
+          end
+        end
+
+        return super
+      end
+
+      def profile_url
+        links = page.search("a.name")
+
+        if links.any?
+          return "https://nijie.info/" + links[0]["href"]
+        end
+
+        return nil
+      end
+
+      def artist_name
+        links = page.search("a.name")
+
+        if links.any?
+          return links[0].text
+        end
+
+        return nil
+      end
+
+      def artist_commentary_title
+        page.search("h2.illust_title").text
+      end
+
+      def artist_commentary_desc
+        page.search('meta[property="og:description"]').attr("content").value
+      end
+
+      def tags
+        links = page.search("div#view-tag a").find_all do |node|
+          node["href"] =~ /search\.php/
+        end
+
+        if links.any?
+          return links.map do |node|
+            [node.inner_text, "https://nijie.info" + node.attr("href")]
+          end
+        end
+
+        return []
+      end
+
      def unique_id
        profile_url =~ /nijie\.info\/members.php\?id=(\d+)/
        "nijie" + $1.to_s
      end

-      def image_url
-        image_urls.first
-      end
-
-      def get
-        page = agent.get(referer_url)
-
-        if page.search("div#header-login-container").any?
-          # Session cache is invalid, clear it and log in normally.
-          Cache.delete("nijie-session")
-          @agent = nil
-          page = agent.get(referer_url)
-        end
-
-        @artist_name, @profile_url = get_profile_from_page(page)
-        @image_urls = get_image_urls_from_page(page)
-        @tags = get_tags_from_page(page)
-        @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page)
-      end
-
-    protected
+    public

      def self.to_dtext(text)
        text = text.gsub(/\r\n|\r/, "<br>")
        DText.from_html(text).strip
      end

-      def get_commentary_from_page(page)
-        title = page.search("h2.illust_title").text
-        desc = page.search('meta[property="og:description"]').attr("content").value
-
-        [title, desc]
+      def normalize_thumbnails(x)
+        x.gsub(%r!__rs_l120x120/!i, "")
      end

-      def get_profile_from_page(page)
-        links = page.search("a.name")
+      def page
+        doc = agent.get(page_url)

-        if links.any?
-          profile_url = "http://nijie.info/" + links[0]["href"]
-          artist_name = links[0].text
-        else
-          profile_url = nil
-          artist_name = nil
+        if doc.search("div#header-login-container").any?
+          # Session cache is invalid, clear it and log in normally.
+          Cache.delete("nijie-session")
+          doc = agent.get(page_url)
        end

-        return [artist_name, profile_url].compact
-      end
-
-      def get_image_urls_from_page(page)
-        page.search("div#gallery a > img").map do |img|
-          # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
-          # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
-          url = "https:" + img.attr("src")
-          normalize_image_url(url)
-        end
-      end
-
-      def get_tags_from_page(page)
-        # puts page.root.to_xhtml
-
-        links = page.search("div#view-tag a").find_all do |node|
-          node["href"] =~ /search\.php/
-        end
-
-        if links.any?
-          links.map do |node|
-            [node.inner_text, "http://nijie.info" + node.attr("href")]
-          end
-        else
-          []
-        end
-      end
-
-      def normalize_url(url)
-        if url =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
-          return "http://nijie.info/view.php?id=#{$1}"
-        else
-          return url
-        end
-      end
-
-      def normalize_image_url(image_url)
-        # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
-        # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
-        if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
-          image_url = image_url.gsub(%r!__rs_l120x120/!i, "")
-        end
-
-        image_url = image_url.gsub(%r!\Ahttp:!i, "https:")
-        image_url
+        return doc
      end
+      memoize :page

      def agent
-        @agent ||= begin
-          mech = Mechanize.new
+        mech = Mechanize.new

-          session = Cache.get("nijie-session")
-          if session
-            cookie = Mechanize::Cookie.new("NIJIEIJIEID", session)
-            cookie.domain = ".nijie.info"
-            cookie.path = "/"
-            mech.cookie_jar.add(cookie)
-          else
-            mech.get("http://nijie.info/login.php") do |page|
-              page.form_with(:action => "/login_int.php") do |form|
-                form['email'] = Danbooru.config.nijie_login
-                form['password'] = Danbooru.config.nijie_password
-              end.click_button
-            end
-            session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
-            Cache.put("nijie-session", session.value, 1.month) if session
-          end
-
-          # This cookie needs to be set to allow viewing of adult works while anonymous
-          cookie = Mechanize::Cookie.new("R18", "1")
+        session = Cache.get("nijie-session")
+        if session
+          cookie = Mechanize::Cookie.new("NIJIEIJIEID", session)
          cookie.domain = ".nijie.info"
          cookie.path = "/"
          mech.cookie_jar.add(cookie)
+        else
+          mech.get("https://nijie.info/login.php") do |page|
+            page.form_with(:action => "/login_int.php") do |form|
+              form['email'] = Danbooru.config.nijie_login
+              form['password'] = Danbooru.config.nijie_password
+            end.click_button
+          end
+          session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
+          Cache.put("nijie-session", session.value, 1.day) if session
+        end

-          mech
+        # This cookie needs to be set to allow viewing of adult works while anonymous
+        cookie = Mechanize::Cookie.new("R18", "1")
+        cookie.domain = ".nijie.info"
+        cookie.path = "/"
+        mech.cookie_jar.add(cookie)
+
+        mech
+
+      rescue Mechanize::ResponseCodeError => x
+        if x.response_code.to_i == 429
+          sleep(5)
+          retry
+        else
+          raise
        end
      end
+      memoize :agent
    end
  end
 end
--- a/app/logical/sources/strategies/null.rb
+++ b/app/logical/sources/strategies/null.rb
@@ -0,0 +1,43 @@
+module Sources
+  module Strategies
+    class Null < Base
+      def self.match?(*urls)
+        true
+      end
+
+      def image_urls
+        [url]
+      end
+
+      def page_url
+        url
+      end
+
+      def normalized_for_artist_finder?
+        true
+      end
+
+      def normalizable_for_artist_finder?
+        false
+      end
+
+      def normalize_for_artist_finder
+        url
+      end
+
+      def site_name
+        URI.parse(url).hostname || "N/A"
+      rescue
+        "N/A"
+      end
+
+      def unique_id
+        url
+      end
+
+      def rewrite(url, headers, data)
+        return [url, headers, data]
+      end
+    end
+  end
+end
--- a/app/logical/sources/strategies/pawoo.rb
+++ b/app/logical/sources/strategies/pawoo.rb
@@ -1,62 +1,80 @@
-# html page urls:
-#   https://pawoo.net/@evazion/19451018
-#   https://pawoo.net/web/statuses/19451018
-#
-# image urls:
-#   https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
-#   https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
-#   https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
-#
-# artist urls:
-#   https://pawoo.net/@evazion
-#   https://pawoo.net/web/accounts/47806
-
 module Sources::Strategies
  class Pawoo < Base
-    attr_reader :image_urls
+    IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!

-    def self.url_match?(url)
-      PawooApiClient::Status.is_match?(url) || PawooApiClient::Account.is_match?(url)
-    end
-
-    def referer_url
-      normalized_url
+    def self.match?(*urls)
+      urls.compact.any? do |x| 
+        x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x)
+      end
    end

    def site_name
      "Pawoo"
    end

-    def api_response
-      @response ||= PawooApiClient.new.get(normalized_url)
+    def image_url
+      image_urls.first
    end

-    def get
-      response = api_response
-      @artist_name = response.account_name
-      @profile_url = response.profile_url
-      @image_url = response.image_urls.first
-      @image_urls = response.image_urls
-      @tags = response.tags
-      @artist_commentary_title = nil
-      @artist_commentary_desc = response.commentary
-    end
-
-    def normalized_url
-      if self.class.url_match?(@url)
-        @url
-      elsif self.class.url_match?(@referer_url)
-        @referer_url
+    # https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
+    # https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
+    # https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
+    def image_urls
+      if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i
+        return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
      end
+
+      if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i
+        return [url]
+      end
+
+      return api_response.image_urls
+    end
+
+    # https://pawoo.net/@evazion/19451018
+    # https://pawoo.net/web/statuses/19451018
+    def page_url
+      [url, referer_url].each do |x|
+        if PawooApiClient::Status.is_match?(x)
+          return x
+        end
+      end
+
+      return super
+    end
+
+    # https://pawoo.net/@evazion
+    # https://pawoo.net/web/accounts/47806
+    def profile_url
+      if url =~ PawooApiClient::PROFILE2
+        return "https://pawoo.net/@#{$1}"
+      end
+
+      api_response.profile_url
+    end
+
+    def artist_name
+      api_response.account_name
+    end
+
+    def artist_commentary_title
+      nil
+    end
+
+    def artist_commentary_desc
+      api_response.commentary
+    end
+
+    def tags
+      api_response.tags
    end

    def normalizable_for_artist_finder?
      true
    end

-    def normalize_for_artist_finder!
-      get
-      @profile_url || @url
+    def normalize_for_artist_finder
+      profile_url
    end

    def dtext_artist_commentary_desc
@@ -68,5 +86,18 @@ module Sources::Strategies
        end
      end.strip
    end
+
+  public
+
+    def api_response
+      [url, referer_url].each do |x|
+        if client = PawooApiClient.new.get(x)
+          return client
+        end
+      end
+
+      nil
+    end
+    memoize :api_response
  end
 end
--- a/app/logical/sources/strategies/pixiv.rb
+++ b/app/logical/sources/strategies/pixiv.rb
@@ -1,122 +1,23 @@
-# encoding: UTF-8
-
 require 'csv'

 module Sources
  module Strategies
    class Pixiv < Base
-      attr_reader :zip_url, :ugoira_frame_data, :ugoira_content_type
+      MONIKER = %r!(?:[a-zA-Z0-9_-]+)!
+      PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z!
+      EXT =     %r!(?:jpg|jpeg|png|gif)!i

-      MONIKER   = '(?:[a-zA-Z0-9_-]+)'
-      TIMESTAMP = '(?:[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2})'
-      EXT = "(?:jpg|jpeg|png|gif)"
+      WEB =     %r!(?:\A(?:https?://)?www\.pixiv\.net)!
+      I12 =     %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)!
+      IMG =     %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)!
+      PXIMG =   %r!(?:\A(?:https?://)?i\.pximg\.net)!
+      TOUCH =   %r!(?:\A(?:https?://)?touch\.pixiv\.net)!
+      NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))!
+      FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
+      FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!

-      WEB =   '(?:\A(?:https?://)?www\.pixiv\.net)'
-      I12 =   '(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)'
-      IMG =   '(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)'
-      PXIMG = '(?:\A(?:https?://)?i\.pximg\.net)'
-      TOUCH = '(?:\A(?:https?://)?touch\.pixiv\.net)'
-
-      def self.url_match?(url)
-        url =~ /#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}/i
-      end
-
-      def referer_url
-        if @referer_url =~ /pixiv\.net\/member_illust.+mode=medium/ && @url =~ /#{IMG}|#{I12}/
-          @referer_url
-        else
-          @url
-        end
-      end
-
-      def site_name
-        "Pixiv"
-      end
-
-      def unique_id
-        @pixiv_moniker
-      end
-
-      def fake_referer
-        "http://www.pixiv.net"
-      end
-
-      def normalized_for_artist_finder?
-        url =~ %r!\Ahttp://www\.pixiv\.net/member\.php\?id=[0-9]+\z/!
-      end
-
-      def normalizable_for_artist_finder?
-        has_moniker? || sample_image? || full_image? || work_page?
-      end
-
-      def normalize_for_artist_finder!
-        @illust_id = illust_id_from_url!
-        @metadata = get_metadata_from_papi(@illust_id)
-
-        "http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
-      end
-
-      def translate_tag(tag)
-        normalized_tag = tag.gsub(/\d+users入り\z/i, "")
-
-        translated_tags = super(normalized_tag)
-        if translated_tags.empty? && normalized_tag.include?("/")
-          translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
-        end
-
-        translated_tags
-      end
-
-      def get
-        return unless illust_id_from_url
-        @illust_id = illust_id_from_url
-        @metadata = get_metadata_from_papi(@illust_id)
-
-        page = agent.get(URI.parse(normalized_url))
-        
-        if page.search("body.not-logged-in").any?
-          # Session cache is invalid, clear it and log in normally.
-          Cache.delete("pixiv-phpsessid")
-          @agent = nil
-          page = agent.get(URI.parse(normalized_url))
-        end
-        
-        @artist_name = @metadata.name
-        @profile_url = "http://www.pixiv.net/member.php?id=#{@metadata.user_id}"
-        @pixiv_moniker = @metadata.moniker
-        @zip_url, @ugoira_frame_data, @ugoira_content_type = get_zip_url_from_api
-        @tags = @metadata.tags.map do |tag|
-          [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
-        end
-        @page_count = @metadata.page_count
-        @artist_commentary_title = @metadata.artist_commentary_title
-        @artist_commentary_desc = @metadata.artist_commentary_desc
-
-        is_manga = @page_count > 1
-
-        if !@zip_url
-          page = manga_page_from_url(@url).to_i
-          @image_url = image_urls[page]
-        end
-      end
-
-      def rewrite_thumbnails(thumbnail_url, is_manga=nil)
-        thumbnail_url = rewrite_new_medium_images(thumbnail_url)
-        thumbnail_url = rewrite_medium_ugoiras(thumbnail_url)
-        thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
-        return thumbnail_url
-      end
-
-      def agent
-        @agent ||= PixivWebAgent.build
-      end
-
-      def file_url
-        image_url || zip_url
-      end
-
-      def image_urls
-        @metadata.pages
+      def self.match?(*urls)
+        urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}/i) }
      end

      def self.to_dtext(text)
@@ -137,18 +38,147 @@ module Sources
        DText.from_html(text)
      end

-      def illust_id_from_url
-        if sample_image? || full_image? || work_page?
-          illust_id_from_url!
-        else
-          nil
+      def site_name
+        "Pixiv"
+      end
+
+      def image_urls
+        image_urls_sub.
+          map {|x| rewrite_cdn(x)}
+      rescue PixivApiClient::BadIDError
+        [url]
+      end
+
+      def page_url
+        if novel_id.present?
+          return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
        end
-      rescue Sources::Error
-        raise if Rails.env.test?
+
+        if fanbox_id.present?
+          return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
+        end
+
+        if illust_id.present?
+          return "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}"
+        end
+
+        return url
+
+      rescue PixivApiClient::BadIDError
+        nil
+      end
+      
+      def canonical_url
+        return image_url
+      end
+
+      def profile_url
+        [url, referer_url].each do |x|
+          if x =~ PROFILE
+            return x
+          end
+        end
+
+        "https://www.pixiv.net/member.php?id=#{metadata.user_id}"
+      rescue PixivApiClient::BadIDError
        nil
      end

-      def illust_id_from_url!
+      def artist_name
+        metadata.name
+      rescue PixivApiClient::BadIDError
+        nil
+      end
+
+      def artist_commentary_title
+        metadata.artist_commentary_title
+      rescue PixivApiClient::BadIDError
+        nil
+      end
+
+      def artist_commentary_desc
+        metadata.artist_commentary_desc
+      rescue PixivApiClient::BadIDError
+        nil
+      end
+      
+      def headers
+        if fanbox_id.present?
+          # need the session to download fanbox images
+          return {
+            "Referer" => "https://www.pixiv.net/fanbox",
+            "Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
+          }
+        end
+
+        return {
+          "Referer" => "https://www.pixiv.net"
+        }
+      end
+
+      def normalized_for_artist_finder?
+        url =~ PROFILE
+      end
+
+      def normalizable_for_artist_finder?
+        illust_id.present? || novel_id.present? || fanbox_id.present?
+      end
+
+      def unique_id
+        moniker
+      end
+
+      def tags
+        metadata.tags.map do |tag|
+          [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
+        end
+      rescue PixivApiClient::BadIDError
+        []
+      end
+      memoize :tags
+
+      def translate_tag(tag)
+        normalized_tag = tag.gsub(/\d+users入り\z/i, "")
+        translated_tags = super(normalized_tag)
+
+        if translated_tags.empty? && normalized_tag.include?("/")
+          translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
+        end
+
+        translated_tags
+      end
+
+    public
+
+      def image_urls_sub
+        # there's too much normalization bullshit we have to deal with
+        # raw urls, so just fetch the canonical url from the api every
+        # time.
+
+        if manga_page.present?
+          return [metadata.pages[manga_page]]
+        end
+
+        if metadata.pages.is_a?(Hash)
+          return [ugoira_zip_url]
+        end
+
+        return metadata.pages
+      end
+
+      def rewrite_cdn(x)
+        if x =~ %r{\Ahttps?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
+          return x.sub(".edgesuite.net", "")
+        end
+
+        return x
+      end
+
+      # in order to prevent recursive loops, this method should not make any
+      # api calls and only try to extract the illust_id from the url. therefore,
+      # even though it makes sense to reference page_url here, it will only look
+      # at (url, referer_url).
+      def illust_id
        # http://img18.pixiv.net/img/evazion/14901720.png
        #
        # http://i2.pixiv.net/img18/img/evazion/14901720.png
@@ -165,228 +195,166 @@ module Sources
        #
        # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
        if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
-          $1
-
-        # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
-        # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
-        # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
-        # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
-        elsif url =~ /illust_id=(\d+)/i
-          $1
-
-        # http://www.pixiv.net/i/18557054
-        elsif url =~ %r!pixiv\.net/i/(\d+)!i
-          $1
-
-        else
-          raise Sources::Error.new("Couldn't get illust ID from URL: #{url}")
-        end
-      end
-
-      # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
-      # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
-      #
-      # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
-      # => http://i.pximg.net/img-original/img/2014/05/15/23/53/59/43521009_p1.jpg
-      def rewrite_new_medium_images(thumbnail_url)
-        if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i ||
-           thumbnail_url =~ %r!/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i
-          page = manga_page_from_url(@url).to_i
-          thumbnail_url = @metadata.pages[page]
+          return $1
        end

-        thumbnail_url
-      end
-
-      # http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira600x600.zip
-      # => http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira1920x1080.zip
-      def rewrite_medium_ugoiras(thumbnail_url)
-        if thumbnail_url =~ %r!/img-zip-ugoira/img/.*/\d+_ugoira600x600.zip!i
-          thumbnail_url = thumbnail_url.sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
-        end
-
-        thumbnail_url
-      end
-
-      # If the thumbnail is for a manga gallery, it needs to be rewritten like this:
-      #
-      # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
-      # => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
-      #
-      # Otherwise, it needs to be rewritten like this:
-      #
-      # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
-      # => http://i2.pixiv.net/img18/img/evazion/14901720.png
-      #
-      def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
-        if thumbnail_url =~ %r!/img/#{MONIKER}/\d+_[ms]\.#{EXT}!i
-          if is_manga.nil?
-            page_count = @metadata.page_count
-            is_manga = page_count > 1
+        [url, referer_url].each do |x|
+          # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
+          # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
+          # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
+          # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
+          if x =~ /illust_id=(\d+)/i
+            return $1
          end

-          if is_manga
-            page = manga_page_from_url(@url)
-            return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
-          else
-            return thumbnail_url.sub(/_[ms]\./, ".")
+          # http://www.pixiv.net/i/18557054
+          if x =~ %r!pixiv\.net/i/(\d+)!i
+            return $1
          end
        end

-        return thumbnail_url
+        raise Sources::Error.new("Couldn't get illust ID from URL (#{url}, #{referer_url})")
+      end
+      memoize :illust_id
+
+      def novel_id
+        [url, referer_url].each do |x|
+          if x =~ NOVEL_PAGE
+            return $1
+          end
+        end
+
+        return nil
+      end
+      memoize :novel_id
+
+      def fanbox_id
+        [url, referer_url].each do |x|
+          if x =~ FANBOX_PAGE
+            return $1
+          end
+
+          if x =~ FANBOX_IMAGE
+            return $1
+          end
+        end
+
+        return nil
+      end
+      memoize :fanbox_id
+
+      def agent
+        PixivWebAgent.build
+      end
+      memoize :agent
+
+      def page
+        agent.get(URI.parse(page_url))
+        
+        if page.search("body.not-logged-in").any?
+          # Session cache is invalid, clear it and log in normally.
+          Cache.delete("pixiv-phpsessid")
+          @agent = nil
+          page = agent.get(URI.parse(page_url))
+        end
+
+        page
+      end
+      memoize :page
+
+      def metadata
+        if novel_id.present?
+          return PixivApiClient.new.novel(novel_id)
+        end
+
+        if fanbox_id.present?
+          return PixivApiClient.new.fanbox(fanbox_id)
+        end
+
+        return PixivApiClient.new.work(illust_id)
+      end
+      memoize :metadata
+
+      def moniker
+        # we can sometimes get the moniker from the url
+        if url =~ %r!#{IMG}/img/(#{MONIKER})!i
+          return $1
+        end
+
+        if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
+          return $1
+        end
+
+        if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
+          return $1
+        end
+
+        return metadata.moniker
+      end
+      memoize :moniker
+
+      def page_count
+        metadata.page_count
      end

-      def manga_page_from_url(url)
+      def data
+        return {
+          ugoira_frame_data: ugoira_frame_data
+        }
+      end
+
+      def ugoira_zip_url
+        if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
+          return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
+        end
+      end
+      memoize :ugoira_zip_url
+
+      def ugoira_frame_data
+        return metadata.json.dig("metadata", "frames")
+      end
+      memoize :ugoira_frame_data
+
+      def ugoira_content_type
+        case metadata.json["image_urls"].to_s
+        when /\.jpg/
+          return "image/jpeg"
+
+        when /\.png/
+          return "image/png"
+
+        when /\.gif/
+          return "image/gif"
+        end
+
+        raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
+      end
+      memoize :ugoira_content_type
+
+      def is_manga?
+        page_count > 1
+      end
+
+      # Returns the current page number of the manga. This will not
+      # make any api calls and only looks at (url, referer_url).
+      def manga_page
        # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
        # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
        # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
        if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i
-          $1
+          return $1.to_i
+        end

        # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
-        elsif url =~ /page=(\d+)/i
-          $1
-
-        else
-          0
-        end
-      end
-
-      def get_moniker_from_url
-        case url
-        when %r!#{IMG}/img/(#{MONIKER})!i
-          $1
-        when %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
-          $1
-        when %r!#{WEB}/stacc/(#{MONIKER})/?$!i
-          $1
-        else
-          false
-        end
-      end
-
-      def has_moniker?
-        get_moniker_from_url != false
-      end
-
-      def get_image_url_from_page(page, is_manga)
-        if is_manga
-          elements = page.search("div.works_display a img").find_all do |node|
-            node["src"] !~ /source\.pixiv\.net/
+        [url, referer_url].each do |x|
+          if x =~ /page=(\d+)/i
+            return $1.to_i
          end
-        else
-          elements = page.search("div.works_display div img.big")
-          elements = page.search("div.works_display div img") if elements.empty?
        end

-        if elements.any?
-          element = elements.first
-          thumbnail_url = element.attr("src") || element.attr("data-src")
-          return rewrite_thumbnails(thumbnail_url, is_manga)
-        end
-
-        if page.body =~ /"original":"(https:.+?)"/
-          return $1.gsub(/\\\//, '/')
-        end
-      end
-
-      def get_zip_url_from_api
-        if @metadata.pages.is_a?(Hash) && @metadata.pages["ugoira600x600"]
-          zip_url = @metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
-          frame_data = @metadata.json["metadata"]["frames"]
-          content_type = nil
-          
-          case @metadata.json["image_urls"].to_s
-          when /\.jpg/
-            content_type = "image/jpeg"
-
-          when /\.png/
-            content_type = "image/png"
-
-          when /\.gif/
-            content_type = "image/gif"
-          end
-
-          return [zip_url, frame_data, content_type]
-        end
-      end
-
-      def get_zip_url_from_page(page)
-        scripts = page.search("body script").find_all do |node|
-          node.text =~ /_ugoira600x600\.zip/
-        end
-
-        if scripts.any?
-          javascript = scripts.first.text
-
-          json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
-          data = JSON.parse(json)
-          zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
-          frame_data = data["frames"]
-          content_type = data["mime_type"]
-
-          return [zip_url, frame_data, content_type]
-        end
-      end
-
-      def normalized_url
-        "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
-      end
-
-      def get_metadata_from_papi(illust_id)
-        @metadata ||= PixivApiClient.new.works(illust_id)
-      end
-
-      def work_page?
-        return true if url =~ %r!(?:#{WEB}|#{TOUCH})/member_illust\.php! && url =~ %r!mode=(?:medium|big|manga|manga_big)! && url =~ %r!illust_id=\d+!
-        return true if url =~ %r!(?:#{WEB}|#{TOUCH})/i/\d+$!i
-        return false
-      end
-
-      def full_image?
-        # http://img18.pixiv.net/img/evazion/14901720.png?1234
-        return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
-
-        # http://i2.pixiv.net/img18/img/evazion/14901720.png
-        # http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
-        return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
-
-        # http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
-        return true if url =~ %r!#{I12}/img-original/img/#{TIMESTAMP}/\d+_p\d+\.#{EXT}$!i
-
-        # http://i.pximg.net/img-original/img/2017/03/22/17/40/51/62041488_p0.jpg
-        return true if url =~ %r!#{PXIMG}/img-original/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
-
-        # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
-        return true if url =~ %r!(#{I12}|#{PXIMG})/img-zip-ugoira/img/#{TIMESTAMP}/\d+_ugoira\d+x\d+\.zip$!i
-
-        return false
-      end
-
-      def sample_image?
-        # http://img18.pixiv.net/img/evazion/14901720_m.png
-        return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
-
-        # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
-        # http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
-        return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
-
-        # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
-        # http://i2.pixiv.net/c/64x64/img-master/img/2014/10/09/12/59/50/46441917_square1200.jpg
-        return true if url =~ %r!#{I12}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}$!i
-
-        # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
-        return true if url =~ %r!#{PXIMG}/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
-
-        # http://i.pximg.net/c/600x600/img-master/img/2017/03/22/17/40/51/62041488_p0_master1200.jpg
-        return true if url =~ %r!#{PXIMG}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
-
-        # http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
-        # http://i2.pixiv.net/img-inf/img/2010/11/30/08/54/06/14901765_64x64.jpg
-        return true if url =~ %r!#{I12}/img-inf/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
-
-        return false
+        return nil
      end
+      memoize :manga_page
    end
  end
 end
--- a/app/logical/sources/strategies/tumblr.rb
+++ b/app/logical/sources/strategies/tumblr.rb
@@ -1,28 +1,52 @@
 module Sources::Strategies
  class Tumblr < Base
-    extend Memoist
+    DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
+    MD5 = %r{(?<md5>[0-9a-f]{32})}i
+    FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
+    SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i
+    EXT = %r{(?<ext>\w+)}
+    IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
+    POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i

-    def self.url_match?(url)
-      blog_name, post_id = parse_info_from_url(url)
-      blog_name.present? && post_id.present?
+    def self.match?(*urls)
+      urls.compact.any? do |url|
+        blog_name, post_id = parse_info_from_url(url)
+        url =~ IMAGE || blog_name.present? && post_id.present?
+      end
    end

-    def referer_url
-      blog_name, post_id = self.class.parse_info_from_url(normalized_url)
-      "https://#{blog_name}.tumblr.com/post/#{post_id}"
-    end
-
-    def tags
-      post[:tags].map do |tag|
-        # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
-        [tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
-      end.uniq
+    def self.parse_info_from_url(url)
+      if url =~ POST
+        [$~[:blog_name], $~[:post_id]]
+      else
+        []
+      end
    end

    def site_name
      "Tumblr"
    end

+    def image_urls
+      image_urls_sub
+        .uniq
+        .map {|x| normalize_cdn(x)}
+        .map {|x| find_largest(x)}
+        .compact
+        .uniq
+    end
+
+    def page_url
+      [url, referer_url].each do |x|
+        if x =~ POST
+          blog_name, post_id = self.class.parse_info_from_url(x)
+          return "https://#{blog_name}.tumblr.com/post/#{post_id}"
+        end
+      end
+
+      return super
+    end
+
    def profile_url
      "https://#{artist_name}.tumblr.com/"
    end
@@ -35,8 +59,10 @@ module Sources::Strategies
      case post[:type]
      when "text", "link"
        post[:title]
+
      when "answer"
        "#{post[:asking_name]} asked: #{post[:question]}"
+
      else
        nil
      end
@@ -46,94 +72,133 @@ module Sources::Strategies
      case post[:type]
      when "text"
        post[:body]
+
      when "link"
        post[:description]
+
      when "photo", "video"
        post[:caption]
+
      when "answer"
        post[:answer]
+
      else
        nil
      end
    end

+    def tags
+      post[:tags].map do |tag|
+        # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
+        etag = tag.gsub(/[ _-]/, "_")
+        [etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
+      end.uniq
+    end
+    memoize :tags
+
    def dtext_artist_commentary_desc
      DText.from_html(artist_commentary_desc).strip
    end

-    def image_url
-      image_urls.first
-    end
+  public

-    def image_urls
-      urls = case post[:type]
-      when "photo"
-        post[:photos].map do |photo|
-          self.class.normalize_image_url(photo[:original_size][:url])
-        end
-      when "video"
-        [post[:video_url]]
-      else
-        []
+    def image_urls_sub
+      list = []
+
+      if url =~ IMAGE
+        list << url
      end

-      urls += self.class.parse_inline_images(artist_commentary_desc)
-      urls
-    end
+      if page_url !~ POST
+        return list
+      end

-    def get
-    end
-
-    module HelperMethods
-      extend ActiveSupport::Concern
-
-      module ClassMethods
-        def parse_info_from_url(url)
-          url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
-          [$1, $2]
-        end
-
-        def parse_inline_images(text)
-          html = Nokogiri::HTML.fragment(text)
-          image_urls = html.css("img").map { |node| node["src"] }
-          image_urls = image_urls.map(&method(:normalize_image_url))
-          image_urls
-        end
-
-        def normalize_image_url(url)
-          url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
-          url
+      if post[:type] == "photo"
+        list += post[:photos].map do |photo|
+          photo[:original_size][:url]
        end
      end

-      def normalized_url
-        if self.class.url_match?(@referer_url)
-          @referer_url
-        elsif self.class.url_match?(@url)
-          @url
+      if post[:type] == "video"
+        list << post[:video_url]
+      end
+
+      if inline_images.any?
+        list += inline_images.to_a
+      end
+
+      if list.any?
+        return list
+      end
+
+      raise "image url not found for (#{url}, #{referer_url})"
+    end
+
+    # Normalize cdn subdomains.
+    #
+    # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
+    # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
+    def normalize_cdn(x)
+      # does this work?
+      x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com")
+    end
+
+    # Look for the biggest available version on media.tumblr.com. A bigger
+    # version may or may not exist.
+    #
+    # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
+    # => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
+    #
+    # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
+    # => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
+    #
+    # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
+    # => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
+    #
+    # http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
+    # => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
+    #
+    # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
+    # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
+    def find_largest(x)
+      if x =~ IMAGE
+        sizes = [1280, 640, 540, "500h", 500, 400, 250]
+        candidates = sizes.map do |size|
+          "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
+        end
+
+        return candidates.find do |candidate|
+          http_exists?(candidate, headers)
        end
      end
+
+      return x
    end

-    module ApiMethods
-      def client
-        raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
-        ::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
-      end
-
-      def api_response
-        blog_name, post_id = self.class.parse_info_from_url(normalized_url)
-        client.posts(blog_name, post_id)
-      end
-
-      def post
-        api_response[:posts].first
-      end
+    def inline_images
+      html = Nokogiri::HTML.fragment(artist_commentary_desc)
+      html.css("img").map { |node| node["src"] }
    end
+    memoize :inline_images

-    include ApiMethods
-    include HelperMethods
+    def client
+      raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?

-    memoize :client, :api_response
+      TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
+    end
+    memoize :client
+
+    def api_response
+      blog_name, post_id = self.class.parse_info_from_url(page_url)
+
+      raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil?
+
+      client.posts(blog_name, post_id)
+    end
+    memoize :api_response
+
+    def post
+      api_response[:posts].first
+    end
  end
 end
--- a/app/logical/sources/strategies/twitter.rb
+++ b/app/logical/sources/strategies/twitter.rb
@@ -1,52 +1,94 @@
 module Sources::Strategies
  class Twitter < Base
-    attr_reader :image_urls
+    PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i
+    ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)}!i

-    def self.url_match?(url)
-      self.status_id_from_url(url).present?
+    def self.match?(*urls)
+      urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
    end

-    def referer_url
-      normalized_url
-    end
+    # https://twitter.com/i/web/status/943446161586733056
+    # https://twitter.com/motty08111213/status/943446161586733056
+    def self.status_id_from_url(url)
+      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
+        return $1
+      end

-    def normalized_url
-      "https://twitter.com/#{artist_name}/status/#{status_id}"
-    end
-
-    def artist_name
-      api_response.attrs[:user][:screen_name]
+      return nil
    end

    def site_name
      "Twitter"
    end

-    def api_response
-      @api_response ||= TwitterService.new.client.status(status_id, tweet_mode: "extended")
-    end
-
-    def get
-      attrs = api_response.attrs
-      @profile_url = "https://twitter.com/" + attrs[:user][:screen_name]
-      @image_urls = TwitterService.new.image_urls(api_response)
-      @image_url = @image_urls.first
-      @artist_commentary_title = ""
-      @artist_commentary_desc = attrs[:full_text]
-      @tags = attrs[:entities][:hashtags].map do |text:, indices:|
-        [text, "https://twitter.com/hashtag/#{text}"]
+    def image_urls
+      if url =~ /(#{ASSET}[^:]+)/
+        return [$1 + ":orig" ]
      end
-    rescue ::Twitter::Error::Forbidden
+
+      [url, referer_url].each do |x|
+        if x =~ PAGE
+          return service.image_urls(api_response)
+        end
+      end
+    rescue Twitter::Error::NotFound
+      url
+    end
+    memoize :image_urls
+
+    def page_url
+      [url, referer_url].each do |x|
+        if self.class.status_id_from_url(x).present?
+          return x
+        end
+      end
+
+      return super
    end

-    def normalize_for_artist_finder!
-      url.downcase
+    def profile_url
+      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i
+        if $1 != "i"
+          return "https://twitter.com/#{$1}"
+        end
+      end
+
+      "https://twitter.com/" + api_response.attrs[:user][:screen_name]
+    rescue Twitter::Error::NotFound
+      nil
+    end
+
+    def artist_name
+      api_response.attrs[:user][:screen_name]
+    rescue Twitter::Error::NotFound
+      nil
+    end
+
+    def artist_commentary_title
+      ""
+    end
+
+    def artist_commentary_desc
+      api_response.attrs[:full_text]
+    rescue Twitter::Error::NotFound
+      nil
    end

    def normalizable_for_artist_finder?
-      true
+      url =~ PAGE
    end

+    def normalize_for_artist_finder
+      profile_url.downcase
+    end
+
+    def tags
+      api_response.attrs[:entities][:hashtags].map do |text:, indices:|
+        [text, "https://twitter.com/hashtag/#{text}"]
+      end
+    end
+    memoize :tags
+
    def dtext_artist_commentary_desc
      url_replacements = api_response.urls.map do |obj|
        [obj.url.to_s, obj.expanded_url.to_s]
@@ -63,19 +105,23 @@ module Sources::Strategies
      desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]')
      desc.strip
    end
+    memoize :dtext_artist_commentary_desc
+
+  public
+
+    def service
+      TwitterService.new
+    end
+    memoize :service
+
+    def api_response
+      service.client.status(status_id, tweet_mode: "extended")
+    end
+    memoize :api_response

    def status_id
-      self.class.status_id_from_url(@url) || self.class.status_id_from_url(@referer_url)
-    end
-
-    # https://twitter.com/i/web/status/943446161586733056
-    # https://twitter.com/motty08111213/status/943446161586733056
-    def self.status_id_from_url(url)
-      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
-        $1
-      else
-        nil
-      end
+      [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
    end
+    memoize :status_id
  end
 end