Merge pull request #3805 from r888888888/refactor-sources

Refactor sources
2018-08-28 12:13:15 -07:00
parent 604bfb0923 762dc3da24
commit 68c30961ac
71 changed files with 2340 additions and 2430 deletions
--- a/app/controllers/sources_controller.rb
+++ b/app/controllers/sources_controller.rb
@@ -1,20 +1,12 @@
 class SourcesController < ApplicationController
  respond_to :json, :xml
  rescue_from Sources::Site::NoStrategyError, :with => :no_strategy
  def show
-    @source = Sources::Site.new(params[:url], :referer_url => params[:ref])
+    @source = Sources::Strategies.find(params[:url], params[:ref])
    @source.get
    respond_with(@source.to_h) do |format|
      format.xml { render xml: @source.to_h.to_xml(root: "source") }
      format.json { render json: @source.to_h.to_json }
    end
  end
 protected
  def no_strategy
    render json: {message: "Unsupported site"}.to_json, status: 400
  end
 end
--- a/app/controllers/uploads_controller.rb
+++ b/app/controllers/uploads_controller.rb
@@ -5,7 +5,7 @@ class UploadsController < ApplicationController
  def new
    @upload_notice_wiki = WikiPage.titled(Danbooru.config.upload_notice_wiki_page).first
-    @upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare(
+    @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare(
      url: params[:url], ref: params[:ref]
    )
    respond_with(@upload)
@@ -43,7 +43,7 @@ class UploadsController < ApplicationController
  end
  def preprocess
-    @upload, @post, @source, @normalized_url, @remote_size = UploadService::ControllerHelper.prepare(
+    @upload, @post, @source, @remote_size = UploadService::ControllerHelper.prepare(
      url: params[:url], file: params[:file], ref: params[:ref]
    )
    render body: nil
--- a/app/logical/downloads/file.rb
+++ b/app/logical/downloads/file.rb
@@ -3,16 +3,33 @@ module Downloads
    class Error < Exception ; end
    attr_reader :data, :options
-    attr_accessor :source, :original_source, :downloaded_source
+    attr_accessor :source, :referer
-    def initialize(source, options = {})
+    # Prevent Cloudflare from potentially mangling the image. See issue #3528.
    def self.uncached_url(url, headers = {})
      url = Addressable::URI.parse(url)
      if is_cloudflare?(url, headers)
        url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
      end
      url
    end
    def self.is_cloudflare?(url, headers = {})
      Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
        res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
        raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
        res.key?("CF-Ray")
      end
    end
    def initialize(source, referer=nil, options = {})
      # source can potentially get rewritten in the course
      # of downloading a file, so check it again
      @source = source
-      @original_source = source
+      @referer = referer
      # the URL actually downloaded after rewriting the original source.
      @downloaded_source = nil
      # we sometimes need to capture data from the source page
      @data = {}
@@ -22,48 +39,31 @@ module Downloads
      @data[:get_thumbnail] = options[:get_thumbnail]
    end
    def rewrite_url
      url, _, _ = before_download(@source, @data)
      return url
    end
    def size
-      url, headers, _ = before_download(@source, @data)
+      strategy = Sources::Strategies.find(source, referer)
-      options = { timeout: 3, headers: headers }.deep_merge(Danbooru.config.httparty_options)
+      options = { timeout: 3, headers: strategy.headers }.deep_merge(Danbooru.config.httparty_options)
-      res = HTTParty.head(url, options)
+
      res = HTTParty.head(strategy.file_url, options)
      if res.success?
        res.content_length
      else
        raise HTTParty::ResponseError.new(res)
      end
    end
    def download!
-      url, headers, @data = before_download(@source, @data)
+      strategy = Sources::Strategies.find(source, referer)
      output_file = Tempfile.new(binmode: true)
-      http_get_streaming(uncached_url(url, headers), output_file, headers)
+      @data = strategy.data
-      @downloaded_source = url
+      http_get_streaming(
-      @source = after_download(url)
+        self.class.uncached_url(strategy.file_url, strategy.headers), 
        output_file, 
        strategy.headers
      )
-      output_file
+      [output_file, strategy]
    end
    def before_download(url, datums)
      original_url = url
      headers = Danbooru.config.http_headers
      RewriteStrategies::Base.strategies.each do |strategy|
        url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
        url = original_url if url.nil?
      end
      return [url, headers, datums]
    end
    def after_download(src)
      src = fix_twitter_sources(src)
      if options[:referer_url].present?
        src = set_source_to_referer(src, options[:referer_url])
      end
      src
    end
    def validate_local_hosts(url)
@@ -111,50 +111,5 @@ module Downloads
        end
      end # while
    end # def
    def fix_twitter_sources(src)
      if src =~ %r!^https?://(?:video|pbs)\.twimg\.com/! && original_source =~ %r!^https?://twitter\.com/!
        original_source
      elsif src =~ %r!^https?://img\.pawoo\.net/! && original_source =~ %r!^https?://pawoo\.net/!
        original_source
      else
        src
      end
    end
    def set_source_to_referer(src, referer)
      if Sources::Strategies::Nijie.url_match?(src) ||
         Sources::Strategies::Twitter.url_match?(src) || Sources::Strategies::Twitter.url_match?(referer) ||
         Sources::Strategies::Pawoo.url_match?(src) ||
         Sources::Strategies::Tumblr.url_match?(src) || Sources::Strategies::Tumblr.url_match?(referer) ||
         Sources::Strategies::ArtStation.url_match?(src) || Sources::Strategies::ArtStation.url_match?(referer)
        strategy = Sources::Site.new(src, :referer_url => referer)
        strategy.referer_url
      else
        src
      end
    end
    private
    # Prevent Cloudflare from potentially mangling the image. See issue #3528.
    def uncached_url(url, headers = {})
      url = Addressable::URI.parse(url)
      if is_cloudflare?(url, headers)
        url.query_values = (url.query_values || {}).merge(danbooru_no_cache: SecureRandom.uuid)
      end
      url
    end
    def is_cloudflare?(url, headers = {})
      Cache.get("is_cloudflare:#{url.origin}", 4.hours) do
        res = HTTParty.head(url, { headers: headers }.deep_merge(Danbooru.config.httparty_options))
        raise Error.new("HTTP error code: #{res.code} #{res.message}") unless res.success?
        res.key?("CF-Ray")
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/art_station.rb
+++ b/app/logical/downloads/rewrite_strategies/art_station.rb
@@ -1,33 +0,0 @@
 module Downloads
  module RewriteStrategies
    class ArtStation < Base
      def rewrite(url, headers, data = {})
        # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
        if url =~ %r!^https?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!
          original_url, headers = rewrite_large_url(url, headers)
          if http_exists?(original_url, headers)
            url = original_url
          end
        else
          url, headers = rewrite_html_url(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_html_url(url, headers)
        return [url, headers] unless Sources::Strategies::ArtStation.url_match?(url)
        source = Sources::Site.new(url)
        source.get
        [source.image_url, headers]
      end
      def rewrite_large_url(url, headers)
        # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/original/aoi-ogata-hate-city.jpg?1476754974
        url = url.sub(%r!/(?:medium|small|large)/!, "/original/")
        return [url, headers]
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/base.rb
+++ b/app/logical/downloads/rewrite_strategies/base.rb
@@ -1,29 +0,0 @@
 # This is a collection of strategies for normalizing URLs. Most strategies 
 # typically work by parsing and rewriting the URL itself, but some strategies 
 # may delegate to Sources::Strategies to obtain a more canonical URL.
 module Downloads
  module RewriteStrategies
    class Base
      attr_reader :url
      def initialize(url = nil)
        @url = url
      end
      def self.strategies
        [Downloads::RewriteStrategies::Pixiv, Downloads::RewriteStrategies::NicoSeiga, Downloads::RewriteStrategies::ArtStation, Downloads::RewriteStrategies::Twitpic, Downloads::RewriteStrategies::DeviantArt, Downloads::RewriteStrategies::Tumblr, Downloads::RewriteStrategies::Moebooru, Downloads::RewriteStrategies::Twitter, Downloads::RewriteStrategies::Nijie, Downloads::RewriteStrategies::Pawoo]
      end
      def rewrite(url, headers, data = {})
        return [url, headers, data]
      end
    protected
      def http_exists?(url, headers)
        res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
        res.success?
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/deviant_art.rb
+++ b/app/logical/downloads/rewrite_strategies/deviant_art.rb
@@ -1,53 +0,0 @@
 module Downloads
  module RewriteStrategies
    class DeviantArt < Base
      attr_accessor :url, :source
      def initialize(url)
        @url  = url
      end
      def rewrite(url, headers, data = {})
        if url =~ %r{deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/} || url =~ %r{deviantart\.net/.+/[a-z0-9_]+(_by_[a-z0-9_]+)?-d([a-z0-9]+)\.}i
          url, headers = rewrite_html_pages(url, headers)
          url, headers = rewrite_thumbnails(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_html_pages(url, headers)
        if url =~ %r{^https?://.+?\.deviantart\.com/art/} || url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
          return [source.image_url, headers]
        else
          return [url, headers]
        end
      end
      def rewrite_thumbnails(url, headers)
        if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
          match = $1
          url.sub!(match + "200H/", match)
        elsif url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
          match = $1
          url.sub!(match + "PRE/", match)
        elsif url =~ %r{^https?://(?:pre|img)\d{2}\.deviantart\.net/}
          return [source.image_url, headers]
        end
        return [url, headers]
      end
      # Cache the source data so it gets fetched at most once.
      def source
        @source ||= begin
          source = ::Sources::Strategies::DeviantArt.new(url)
          source.get
          source
        end
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/moebooru.rb
+++ b/app/logical/downloads/rewrite_strategies/moebooru.rb
@@ -1,26 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Moebooru < Base
      DOMAINS = '(?:[^.]+\.)?yande\.re|konachan\.com'
      def rewrite(url, headers, data = {})
        if url =~ %r{https?://(?:#{DOMAINS})}
          url, headers = rewrite_jpeg_versions(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_jpeg_versions(url, headers)
        # example: https://yande.re/jpeg/2c6876ac2317fce617e3c5f1a642123b/yande.re%20292092%20hatsune_miku%20tid%20vocaloid.jpg 
        if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
          url = $1 + "/image/" + $2 + ".png"
        end
        return [url, headers]
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/nico_seiga.rb
+++ b/app/logical/downloads/rewrite_strategies/nico_seiga.rb
@@ -1,66 +0,0 @@
 module Downloads
  module RewriteStrategies
    class NicoSeiga < Base
      attr_accessor :url, :source
      def initialize(url)
        @url  = url
      end
      def rewrite(url, headers, data = {})
        if url =~ %r{https?://lohas\.nicoseiga\.jp} || url =~ %r{https?://seiga\.nicovideo\.jp}
          url, headers = rewrite_headers(url, headers)
          url, headers = rewrite_html_pages(url, headers)
          url, headers = rewrite_thumbnails(url, headers)
          url, headers = rewrite_view_big_pages(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_headers(url, headers)
        headers["Referer"] = "http://seiga.nicovideo.jp"
        return [url, headers]
      end
      def rewrite_html_pages(url, headers)
        # example: http://seiga.nicovideo.jp/seiga/im1389842
        if url =~ %r{https?://seiga\.nicovideo\.jp/seiga/im\d+}
          return [source.image_url, headers]
        else
          return [url, headers]
        end
      end
      def rewrite_thumbnails(url, headers)
        if url =~ %r{/thumb/\d+}
          return [source.image_url, headers]
        end
        return [url, headers]
      end
      def rewrite_view_big_pages(url, headers)
        # example: http://lohas.nicoseiga.jp/o/40aeedd2848a7780b6046747e75b3566b423a10c/1436307639/5026559
        if url =~ %r{http://lohas\.nicoseiga\.jp/o/}
          return [source.image_url, headers]
        else
          return [url, headers]
        end
      end
      # Cache the source data so it gets fetched at most once.
      def source
        @source ||= begin
          source = ::Sources::Strategies::NicoSeiga.new(url)
          source.get
          source
        end
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/nijie.rb
+++ b/app/logical/downloads/rewrite_strategies/nijie.rb
@@ -1,40 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Nijie < Base
      attr_accessor :url, :source
      def initialize(url)
        @url  = url
      end
      def rewrite(url, headers, data = {})
        if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
          url, headers = rewrite_html_pages(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_html_pages(url, headers)
        # example: http://nijie.info/view.php?id=151126
        if url =~ %r{https?://nijie\.info\/view\.php.+id=\d+}
          return [source.image_url, headers]
        else
          return [url, headers]
        end
      end
      # Cache the source data so it gets fetched at most once.
      def source
        @source ||= begin
          source = ::Sources::Strategies::Nijie.new(url)
          source.get
          source
        end
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/pawoo.rb
+++ b/app/logical/downloads/rewrite_strategies/pawoo.rb
@@ -1,17 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Pawoo < Base
      def rewrite(url, headers, data = {})
        if Sources::Strategies::Pawoo.url_match?(url)
          source = Sources::Strategies::Pawoo.new(url)
          source.get
          url = source.image_url
        elsif url =~ %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)/small/([a-z0-9]+\.\w+)\z!i
          url = "https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"
        end
        return [url, headers, data]
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/pixiv.rb
+++ b/app/logical/downloads/rewrite_strategies/pixiv.rb
@@ -1,127 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Pixiv < Base
      attr_accessor :url, :source
      def initialize(url)
        @url  = url
      end
      def rewrite(url, headers, data = {})
        if url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/
          url, headers = rewrite_headers(url, headers)
          url, headers = rewrite_cdn(url, headers)
        end
        if (url =~ /\Ahttps?:\/\/(?:\w+\.)?pixiv\.net/ || url =~ /\Ahttps?:\/\/i\.pximg\.net/) && source.illust_id_from_url
          url, headers = rewrite_html_pages(url, headers)
          url, headers = rewrite_thumbnails(url, headers)
          url, headers = rewrite_old_small_manga_pages(url, headers)
          url, headers = rewrite_to_thumbnails(url, headers) if data.delete(:get_thumbnail)
        end
        # http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip
        if url =~ %r!\Ahttps?://(i\d+\.pixiv|i\.pximg)\.net/img-zip-ugoira/img/\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2}/\d+_ugoira\d+x\d+\.zip\z!i
          data[:is_ugoira] = true
          data[:ugoira_frame_data] = source.ugoira_frame_data
          data[:ugoira_content_type] = source.ugoira_content_type
        end
        return [url, headers, data]
      rescue PixivApiClient::BadIDError, Sources::Site::NoStrategyError
        return [url, headers, data]
      end
    protected
      def rewrite_to_thumbnails(url, headers)
        if url =~ %r!https?://(i\d+)\.pixiv\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
          url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
        elsif url =~ %r!https?://i\.pximg\.net/img-zip-ugoira/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+)_ugoira\d+x\d+\.zip!
          url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
        elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
          url = "http://#{$1}.pixiv.net/c/150x150/img-master/img/#{$2}/#{$3}_master1200.jpg"
        elsif url =~ %r!https?://i\.pximg\.net/img-original/img/(\d{4}/\d{2}/\d{2}/\d{2}/\d{2}/\d{2})/(\d+_p\d+)\.!
          url = "http://i.pximg.net/c/150x150/img-master/img/#{$1}/#{$2}_master1200.jpg"
        elsif url =~ %r!https?://(i\d+)\.pixiv\.net/img(\d+)/img/(.+?)/(\d+)\.!
          url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
        elsif url =~ %r!https?://i\.pximg\.net/img(\d+)/img/(.+?)/(\d+)\.!
          url = "http://#{$1}.pixiv.net/img#{$2}/img/#{$3}/mobile/#{$4}_240mw.jpg"
        end
        return [url, headers]
      end
      def rewrite_headers(url, headers)
        headers["Referer"] = "http://www.pixiv.net"
        return [url, headers]
      end
      # Rewrite these:
      #   http://www.pixiv.net/i/18557054
      #   http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
      #   http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
      #   http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
      #   http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
      # Plus this:
      #   i2.pixiv.net/img-inf/img/2014/09/25/00/57/24/46170939_64x64.jpg
      def rewrite_html_pages(url, headers)
        if url =~ /illust_id=\d+/i || url =~ %r!pixiv\.net/img-inf/img/!i
          return [source.file_url, headers]
        else
          return [url, headers]
        end
      end
      # Rewrite these:
      #   http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_m.jpg
      #   http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
      def rewrite_thumbnails(url, headers)
        url = source.rewrite_thumbnails(url)
        return [url, headers]
      end
      # Rewrite these:
      #   http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
      #   http://img04.pixiv.net/img/syounen_no_uta/46170939_p0.jpg
      # but not these:
      #   http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_big_p0.jpg
      #   http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
      #   http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
      def rewrite_old_small_manga_pages(url, headers)
        if url !~ %r!/img-(?:original|master)/img/!i && url =~ %r!/(\d+_p\d+)\.!i
          match = $1
          repl = match.sub(/_p/, "_big_p")
          big_url = url.sub(match, repl)
          if http_exists?(big_url, headers)
            url = big_url
          end
        end
        return [url, headers]
      end
      def rewrite_cdn(url, headers)
        if url =~ %r{https?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
          url = url.sub(".edgesuite.net", "")
        end
        return [url, headers]
      end
      # Cache the source data so it gets fetched at most once.
      def source
        @source ||= begin
          source = ::Sources::Site.new(url)
          source.get
          source
        end
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/tumblr.rb
+++ b/app/logical/downloads/rewrite_strategies/tumblr.rb
@@ -1,70 +0,0 @@
 module Downloads
  module RewriteStrategies
    DOMAIN = '(data|(\d+\.)?media)\.tumblr\.com'
    MD5 = '(?<md5>[0-9a-f]{32})'
    FILENAME = '(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)'
    SIZES = '(250|400|500|500h|540|1280|raw)'
    EXT = '(?<ext>\w+)'
    class Tumblr < Base
      def rewrite(url, headers, data = {})
        url = rewrite_cdn(url)
        url = rewrite_samples(url, headers)
        url = rewrite_html_pages(url)
        return [url, headers, data]
      end
    protected
      # Look for the biggest available version on data.tumblr.com. A bigger
      # version may or may not exist.
      #
      # http://40.media.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_500h.png
      # => http://data.tumblr.com/d8c6d49785c0842ee31ff26c010b7445/tumblr_naypopLln51tkufhoo2_raw.png
      #
      # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
      # => http://data.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
      #
      # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
      # => http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
      #
      # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
      # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
      #
      # http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
      # => http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
      #
      # http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
      # => http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
      def rewrite_samples(url, headers)
        if url =~ %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
          sizes = ["raw", 1280, 640, 540, "500h", 500, 400, 250]
          candidates = sizes.map do |size|
            "http://data.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
          end
          url = candidates.find do |candidate|
            http_exists?(candidate, headers)
          end
        end
        url
      end
      # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
      # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
      def rewrite_cdn(url)
        url.sub!(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/data\.tumblr\.com!i, "http://data.tumblr.com")
        url
      end
      def rewrite_html_pages(url)
        if Sources::Strategies::Tumblr.url_match?(url)
          url = Sources::Strategies::Tumblr.new(url).image_url
        end
        url
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/twitpic.rb
+++ b/app/logical/downloads/rewrite_strategies/twitpic.rb
@@ -1,36 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Twitpic < Base
      def rewrite(url, headers, data = {})
        if url =~ %r{https?://twitpic\.com} || url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net}
          url, headers = rewrite_html_pages(url, headers)
          url, headers = rewrite_thumbnails(url, headers)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_html_pages(url, headers)
        # example: http://twitpic.com/cpprns
        if url =~ %r{https?://twitpic\.com/([a-z0-9]+)$}
          id = $1
          url = "http://twitpic.com/show/full/#{id}"
          return [url, headers]
        else
          return [url, headers]
        end
      end
      def rewrite_thumbnails(url, headers)
        if url =~ %r{^https?://(?:d3j5vwomefv46c|dn3pm25xmtlyu)\.cloudfront\.net/photos/thumb/(\d+\..+)$}
          match = $1
          url.sub!("/thumb/" + match, "/large/" + match)
        end
        return [url, headers]
      end
    end
  end
 end
--- a/app/logical/downloads/rewrite_strategies/twitter.rb
+++ b/app/logical/downloads/rewrite_strategies/twitter.rb
@@ -1,40 +0,0 @@
 module Downloads
  module RewriteStrategies
    class Twitter < Base
      attr_accessor :url, :source
      def initialize(url)
        @url  = url
      end
      def rewrite(url, headers, data = {})
        if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
          url = source.image_url
        elsif url =~ %r{^https?://pbs\.twimg\.com}
          url, headers = rewrite_thumbnails(url, headers, data)
        end
        return [url, headers, data]
      end
    protected
      def rewrite_thumbnails(url, headers, data)
        if url =~ %r{^(https?://pbs\.twimg\.com/media/[^:]+)}
          url = $1 + ":orig"
        end
        return [url, headers]
      end
      # Cache the source data so it gets fetched at most once.
      def source
        @source ||= begin
          source = ::Sources::Strategies::Twitter.new(url)
          source.get
          source
        end
      end
    end
  end
 end
--- a/app/logical/image_proxy.rb
+++ b/app/logical/image_proxy.rb
@@ -1,12 +1,10 @@
 class ImageProxy
  def self.needs_proxy?(url)
    fake_referer_for(url).present?
  rescue Sources::Site::NoStrategyError
    false
  end
  def self.fake_referer_for(url)
-    Sources::Site.new(url).strategy.try(:fake_referer)
+    Sources::Strategies.find(url).headers["Referer"]
  end
  def self.get_image(url)
--- a/app/logical/iqdb/download.rb
+++ b/app/logical/iqdb/download.rb
@@ -10,11 +10,9 @@ module Iqdb
      headers = {}
      datums = {}
-      Downloads::RewriteStrategies::Base.strategies.each do |strategy|
+      strategy = Sources::Strategies.find(url)
        url, headers, datums = strategy.new(url).rewrite(url, headers, datums)
      end
-      [url, headers["Referer"]]
+      [strategy.image_url, strategy.headers["Referer"]]
    end
    def self.find_similar(source)
--- a/app/logical/nico_seiga_api_client.rb
+++ b/app/logical/nico_seiga_api_client.rb
@@ -12,6 +12,8 @@ class NicoSeigaApiClient
    resp = HTTParty.get(uri, Danbooru.config.httparty_options)
    if resp.success?
      parse_illust_xml_response(resp.body)
    else
      raise HTTParty::ResponseError.new(resp)
    end
  end
@@ -20,6 +22,8 @@ class NicoSeigaApiClient
    resp = HTTParty.get(uri, Danbooru.config.httparty_options)
    if resp.success?
      parse_artist_xml_response(resp.body)
    else
      raise HTTParty::ResponseError.new(resp)
    end
  end
@@ -34,6 +38,6 @@ class NicoSeigaApiClient
    @image_id = image["id"].to_i
    @user_id = image["user_id"].to_i
    @title = image["title"]
-    @desc = image["description"]
+    @desc = image["description"] || image["summary"]
  end
 end
--- a/app/logical/pawoo_api_client.rb
+++ b/app/logical/pawoo_api_client.rb
@@ -1,14 +1,26 @@
 class PawooApiClient
  extend Memoist
  PROFILE1 = %r!\Ahttps?://pawoo\.net/web/accounts/(\d+)!
  PROFILE2 = %r!\Ahttps?://pawoo\.net/@([^/]+)!
  STATUS1 = %r!\Ahttps?://pawoo\.net/web/statuses/(\d+)!
  STATUS2 = %r!\Ahttps?://pawoo\.net/@.+?/([^/]+)!
  class MissingConfigurationError < Exception ; end
  class Account
    attr_reader :json
    def self.is_match?(url)
-      url =~ %r!https?://pawoo.net/web/accounts/(\d+)!
+      if url =~ PROFILE1
-      $1
+        return $1
      end
      if url =~ PROFILE2
        return $1
      end
      false
    end
    def initialize(json)
@@ -44,8 +56,15 @@ class PawooApiClient
    attr_reader :json
    def self.is_match?(url)
-      url =~ %r!https?://pawoo.net/web/statuses/(\d+)! || url =~ %r!https?://pawoo.net/@.+?/(\d+)!
+      if url =~ STATUS1
-      $1
+        return $1
      end
      if url =~ STATUS2
        return $1
      end
      false
    end
    def initialize(json)
@@ -82,11 +101,11 @@ class PawooApiClient
  def get(url)
    if id = Status.is_match?(url)
-      Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body))
+      return Status.new(JSON.parse(access_token.get("/api/v1/statuses/#{id}").body))
-    elsif id = Account.is_match?(url)
+    end
-      Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body))
+
-    else
+    if id = Account.is_match?(url)
-      nil
+      return Account.new(JSON.parse(access_token.get("/api/v1/accounts/#{id}").body))
    end
  end
--- a/app/logical/pixiv_api_client.rb
+++ b/app/logical/pixiv_api_client.rb
@@ -1,6 +1,8 @@
 require 'resolv-replace'
 class PixivApiClient
  extend Memoist
  API_VERSION = "1"
  CLIENT_ID = "bYGKuGVw91e0NMfPGp44euvGt59s"
  CLIENT_SECRET = "HP3RmkgAmEGro0gn1x9ioawQE8WMfvLXDz3ZqxpK"
@@ -23,90 +25,11 @@ class PixivApiClient
  class Error < Exception ; end
  class BadIDError < Error ; end
-  class WorksResponse
+  class WorkResponse
    attr_reader :json, :pages, :name, :moniker, :user_id, :page_count, :tags
    attr_reader :artist_commentary_title, :artist_commentary_desc
    def initialize(json)
      # Sample response: 
      # {
      #     "status": "success",
      #     "response": [
      #         {
      #             "id": 49270482,
      #             "title": "ツイログ",
      #             "caption": null,
      #             "tags": [
      #                 "神崎蘭子",
      #                 "双葉杏",
      #                 "アイドルマスターシンデレラガールズ",
      #                 "Star!!",
      #                 "アイマス5000users入り"
      #             ],
      #             "tools": [
      #                 "CLIP STUDIO PAINT"
      #             ],
      #             "image_urls": {
      #                 "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
      #             },
      #             "width": 1200,
      #             "height": 951,
      #             "stats": {
      #                 "scored_count": 8247,
      #                 "score": 81697,
      #                 "views_count": 191630,
      #                 "favorited_count": {
      #                     "public": 7804,
      #                     "private": 745
      #                 },
      #                 "commented_count": 182
      #             },
      #             "publicity": 0,
      #             "age_limit": "all-age",
      #             "created_time": "2015-03-14 17:53:32",
      #             "reuploaded_time": "2015-03-14 17:53:32",
      #             "user": {
      #                 "id": 341433,
      #                 "account": "nardack",
      #                 "name": "Nardack",
      #                 "is_following": false,
      #                 "is_follower": false,
      #                 "is_friend": false,
      #                 "is_premium": null,
      #                 "profile_image_urls": {
      #                     "px_50x50": "http://i1.pixiv.net/img19/profile/nardack/846482_s.jpg"
      #                 },
      #                 "stats": null,
      #                 "profile": null
      #             },
      #             "is_manga": true,
      #             "is_liked": false,
      #             "favorite_id": 0,
      #             "page_count": 2,
      #             "book_style": "none",
      #             "type": "illustration",
      #             "metadata": {
      #                 "pages": [
      #                     {
      #                         "image_urls": {
      #                             "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg",
      #                             "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p0_master1200.jpg"
      #                         }
      #                     },
      #                     {
      #                         "image_urls": {
      #                             "large": "http://i3.pixiv.net/img-original/img/2015/03/14/17/53/32/49270482_p1.jpg",
      #                             "medium": "http://i3.pixiv.net/c/1200x1200/img-master/img/2015/03/14/17/53/32/49270482_p1_master1200.jpg"
      #                         }
      #                     }
      #                 ]
      #             },
      #             "content_type": null
      #         }
      #     ],
      #     "count": 1
      # }
      @json = json
      @name = json["user"]["name"]
      @user_id = json["user"]["id"]
@@ -131,7 +54,105 @@ class PixivApiClient
    end
  end
-  def works(illust_id)
+  class NovelResponse
    extend Memoist
    attr_reader :json
    def initialize(json)
      @json = json
    end
    def name
      json["user"]["name"]
    end
    def user_id
      json["user"]["id"]
    end
    def moniker
      json["user"]["account"]
    end
    def page_count
      json["page_count"].to_i
    end
    def artist_commentary_title
      json["title"]
    end
    def artist_commentary_desc
      json["caption"]
    end
    def tags
      json["tags"]
    end
    def pages
      # ex: 
      # https://i.pximg.net/c/150x150_80/novel-cover-master/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b_master1200.jpg (6096b)
      # =>
      # https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg (532129b)
      [find_original(json["image_urls"]["small"])]
    end
    memoize :pages
  public
    PXIMG = %r!\Ahttps?://i\.pximg\.net/c/\d+x\d+_\d+/novel-cover-master/img/(?<timestamp>\d+/\d+/\d+/\d+/\d+/\d+)/(?<filename>\d+_[a-f0-9]+)_master\d+\.(?<ext>jpg|jpeg|png|gif)!i
    def find_original(x)
      if x =~ PXIMG
        return "https://i.pximg.net/novel-cover-original/img/#{$~[:timestamp]}/#{$~[:filename]}.#{$~[:ext]}"
      end
      return x
    end
  end
  class FanboxResponse
    attr_reader :json
    def initialize(json)
      @json = json
    end
    def name
      json["body"]["user"]["name"]
    end
    def user_id
      json["body"]["user"]["userId"]
    end
    def moniker
      raise NotImplementedError
    end
    def page_count
      json["body"]["body"]["images"].size
    end
    def artist_commentary_title
      json["body"]["title"]
    end
    def artist_commentary_desc
      json["body"]["body"]["text"]
    end
    def tags
      []
    end
    def pages
      json["body"]["body"]["images"].map {|x| x["originalUrl"]}
    end
  end
  def work(illust_id)
    headers = Danbooru.config.http_headers.merge(
      "Referer" => "http://www.pixiv.net",
      "Content-Type" => "application/x-www-form-urlencoded",
@@ -148,7 +169,7 @@ class PixivApiClient
    json = JSON.parse(body)
    if resp.success?
-      WorksResponse.new(json["response"][0])
+      WorkResponse.new(json["response"][0])
    elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/
      raise BadIDError.new("Pixiv ##{illust_id} not found: work was deleted, made private, or ID is invalid.")
    else
@@ -158,6 +179,40 @@ class PixivApiClient
    raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
  end
  def fanbox(fanbox_id)
    url = "https://www.pixiv.net/ajax/fanbox/post?postId=#{fanbox_id.to_i}"
    resp = agent.get(url)
    json = JSON.parse(resp.body)
    if resp.code == "200"
      FanboxResponse.new(json)
    elsif json["status"] == "failure"
      raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
    end
  rescue JSON::ParserError
    raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
  end
  def novel(novel_id)
    headers = Danbooru.config.http_headers.merge(
      "Referer" => "http://www.pixiv.net",
      "Content-Type" => "application/x-www-form-urlencoded",
      "Authorization" => "Bearer #{access_token}"
    )
    url = "https://public-api.secure.pixiv.net/v#{API_VERSION}/novels/#{novel_id.to_i}.json"
    resp = HTTParty.get(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
    body = resp.body.force_encoding("utf-8")
    json = JSON.parse(body)
    if resp.success?
      NovelResponse.new(json["response"][0])
    elsif json["status"] == "failure" && json.dig("errors", "system", "message") =~ /対象のイラストは見つかりませんでした。/
      raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
    end
  rescue JSON::ParserError
    raise Error.new("Pixiv API call failed (status=#{resp.code} body=#{body})")
  end
  def access_token
    Cache.get("pixiv-papi-access-token", 3000) do
      access_token = nil
@@ -186,4 +241,9 @@ class PixivApiClient
      access_token
    end
  end
  def agent
    PixivWebAgent.build
  end
  memoize :agent
 end
--- a/app/logical/sources/site.rb
+++ b/app/logical/sources/site.rb
@@ -1,78 +0,0 @@
 # encoding: UTF-8
 module Sources
  class Site
    class NoStrategyError < RuntimeError ; end
    attr_reader :strategy
    delegate :url, :get, :get_size, :site_name, :artist_name,
      :profile_url, :image_url, :tags, :artists, :unique_id,
      :file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
      :artist_commentary_title, :artist_commentary_desc,
      :dtext_artist_commentary_title, :dtext_artist_commentary_desc,
      :rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
    def self.strategies
      [Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
    end
    def initialize(url, referer_url: nil)
      @url = url
      Site.strategies.each do |strategy|
        if strategy.url_match?(url) || strategy.url_match?(referer_url)
          @strategy = strategy.new(url, referer_url)
          return
        end
      end
      raise NoStrategyError.new
    end
    def referer_url
      strategy.try(:referer_url)
    end
    def normalized_for_artist_finder?
      available? && strategy.normalized_for_artist_finder?
    end
    def normalize_for_artist_finder!
      if available? && strategy.normalizable_for_artist_finder?
        strategy.normalize_for_artist_finder!
      else
        url
      end
    rescue
      url
    end
    def to_h
      return {
        :artist_name => artist_name,
        :artists => artists.as_json(include: :sorted_urls),
        :profile_url => profile_url,
        :image_url => image_url,
        :image_urls => image_urls,
        :normalized_for_artist_finder_url => normalize_for_artist_finder!,
        :tags => tags,
        :translated_tags => translated_tags,
        :unique_id => unique_id,
        :artist_commentary => {
          :title => artist_commentary_title,
          :description => artist_commentary_desc,
          :dtext_title => dtext_artist_commentary_title,
          :dtext_description => dtext_artist_commentary_desc,
        }
      }
    end
    def to_json
      to_h.to_json
    end
    def available?
      strategy.present?
    end
  end
 end
--- a/app/logical/sources/strategies.rb
+++ b/app/logical/sources/strategies.rb
@@ -0,0 +1,29 @@
 module Sources
  module Strategies
    def self.all
      return [
        Strategies::Pixiv, 
        Strategies::NicoSeiga, 
        Strategies::Twitter, 
        Strategies::DeviantArt, 
        Strategies::Tumblr, 
        Strategies::ArtStation, 
        Strategies::Nijie, 
        Strategies::Pawoo,
        Strategies::Moebooru,
        Strategies::Null # MUST BE LAST!
      ]
    end
    def self.find(url, referer=nil)
      all
        .detect { |strategy| strategy.match?(url, referer) }
        .new(url, referer)
    end
    def self.canonical(url, referer)
      find(url, referer).canonical_url
    end
  end
 end
--- a/app/logical/sources/strategies/art_station.rb
+++ b/app/logical/sources/strategies/art_station.rb
@@ -1,68 +1,165 @@
 module Sources::Strategies
  class ArtStation < Base
    PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)/?\z!i
    ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i
    PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i
    PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i
    PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i
    PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}!
    attr_reader :json, :image_urls
-    def self.url_match?(url)
+    def self.match?(*urls)
-      self.project_id(url).present?
+      urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)}
    end
    # https://www.artstation.com/artwork/04XA4
    # https://www.artstation.com/artwork/cody-from-sf
    # https://sa-dui.artstation.com/projects/DVERn
    def self.project_id(url)
-      if url =~ %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)\z!i
+      if url =~ PROJECT
        $~[:project_id]
      else
        nil
      end
    end
    def referer_url
      if self.class.url_match?(@referer_url)
        @referer_url
      else
        @url
      end
    end
    def site_name
      "ArtStation"
    end
-    def project_id
+    def image_urls
-      self.class.project_id(referer_url)
+      image_urls_sub
        .map { |asset| original_asset_url(asset) }
    end
    memoize :image_urls
    def page_url
-      "https://www.artstation.com/artwork/#{project_id}"
+      [url, referer_url].each do |x|
        if x =~ PROJECT
          return "https://www.artstation.com/artwork/#{$~[:project_id]}"
        end
      end
      return super
    end
    def profile_url
      if url =~ PROFILE1 && $1 != "www"
        return "https://www.artstation.com/#{$1}"
      end
      if url =~ PROFILE2
        return "https://www.artstation.com/#{$1}"
      end
      if url =~ PROFILE3 && url !~ PROJECT
        return url
      end
      api_json["user"]["permalink"]
    end
    def artist_name
      api_json["user"]["username"]
    end
    def artist_commentary_title
      api_json["title"]
    end
    def artist_commentary_desc
      ActionView::Base.full_sanitizer.sanitize(api_json["description"])
    end
    memoize :artist_commentary_desc
    def tags
      return nil if !api_json.has_key?("tags")
      api_json["tags"].
        map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]}
    end
    memoize :tags
    def normalized_for_artist_finder?
      url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT
    end
    def normalizable_for_artist_finder?
      url =~ PROFILE || url =~ PROJECT
    end
    def normalize_for_artist_finder
      profile_url
    end
  public
    def image_urls_sub
      if url.match?(ASSET)
        return [url]
      end
      api_json["assets"]
        .select { |asset| asset["asset_type"] == "image" }
        .map { |asset| asset["image_url"] }
    end
    # these are de facto private methods but are public for testing
    # purposes
    def project_id
      self.class.project_id(url) || self.class.project_id(referer_url)
    end
    memoize :project_id
    def api_url
      "https://www.artstation.com/projects/#{project_id}.json"
    end
-    def image_url
+    def api_json
-      image_urls.first
+      if project_id.nil?
        raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})")
      end
    def get
      resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
-      image_url_rewriter = Downloads::RewriteStrategies::ArtStation.new
+
      if resp.success?
-        @json = JSON.parse(resp.body)
+        json = JSON.parse(resp.body)
        @artist_name = json["user"]["username"]
        @profile_url = json["user"]["permalink"]
        images = json["assets"].select { |asset| asset["asset_type"] == "image" }
        @image_urls = images.map do |x|
          y, _, _ = image_url_rewriter.rewrite(x["image_url"], nil)
          y
        end
        @tags = json["tags"].map {|x| [x.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(x)]} if json["tags"]
        @artist_commentary_title = json["title"]
        @artist_commentary_desc = ActionView::Base.full_sanitizer.sanitize(json["description"])
      else
-        raise "HTTP error code: #{resp.code} #{resp.message}"
+        raise HTTParty::ResponseError.new(resp)
      end
      return json
    end
    memoize :api_json
    # Returns the original representation of the asset, if it exists. Otherwise
    # return the url.
    def original_asset_url(x)
      if x =~ ASSET
        # example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
        original_url = x.sub(%r!/(?:medium|small|large)/!, "/original/")
        if http_exists?(original_url, headers)
          return original_url
        end
        if x =~ /medium|small/
          large_url = x.sub(%r!/(?:medium|small)/!, "/large/")
          if http_exists?(large_url, headers)
            return large_url
          end
        end
      end
      return x
    end
    def tag_url(name)
      "https://www.artstation.com/search?q=" + CGI.escape(name)
    end
  end
 end
--- a/app/logical/sources/strategies/base.rb
+++ b/app/logical/sources/strategies/base.rb
@@ -1,41 +1,108 @@
 # This is a collection of strategies for extracting information about a 
 # resource. At a minimum it tries to extract the artist name and a canonical 
 # URL to download the image from. But it can also be used to normalize a URL 
-# for use with the artist finder. It differs from Downloads::RewriteStrategies
+# for use with the artist finder. 
-# in that the latter is more for normalizing and rewriting a URL until it is 
+#
-# suitable for downloading, whereas Sources::Strategies is more for meta-data 
+# Design Principles
-# that can only be obtained by downloading and parsing the resource.
+#
 # In general you should minimize state. You can safely assume that <tt>url</tt>
 # and <tt>referer_url</tt> will not change over the lifetime of an instance,
 # so you can safely memoize methods and their results. A common pattern is
 # conditionally making an external API call and parsing its response. You should
 # make this call on demand and memoize the response.
 module Sources
  module Strategies
    class Base
      attr_reader :url, :referer_url
      attr_reader :artist_name, :profile_url, :image_url, :tags
      attr_reader :artist_commentary_title, :artist_commentary_desc
-      def self.url_match?(url)
+      extend Memoist
      def self.match?(*urls)
        false
      end
      # * <tt>url</tt> - Should point to a resource suitable for 
      #   downloading. This may sometimes point to the binary file. 
      #   It may also point to the artist's profile page, in cases
      #   where this class is being used to normalize artist urls.
      #   Implementations should be smart enough to detect this and 
      #   behave accordingly.
      # * <tt>referer_url</tt> - Sometimes the HTML page cannot be
      #   determined from <tt>url</tt>. You should generally pass in a
      #   <tt>referrer_url</tt> so the strategy can discover the HTML
      #   page and other information.
      def initialize(url, referer_url = nil)
        @url = url
        @referer_url = referer_url
      end
-      # No remote calls are made until this method is called.
+      def site_name
      def get
        raise NotImplementedError
      end
-      def get_size
+      # Whatever <tt>url</tt> is, this method should return the direct links 
-        @get_size ||= Downloads::File.new(@image_url).size
+      # to the canonical binary files. It should not be an HTML page. It should 
      # be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the 
      # downloader will fetch and save to disk.
      def image_urls
        raise NotImplementedError
      end
      def image_url
        image_urls.first
      end
      # Whatever <tt>url</tt> is, this method should return a link to the HTML
      # page containing the resource. It should not be a binary file. It will
      # eventually be assigned as the source for the post, but it does not
      # represent what the downloader will fetch.
      def page_url
        Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found"
        return nil
      end
      # This will be the url stored in posts. Typically this is the page
      # url, but on some sites it may be preferable to store the image url.
      def canonical_url
        page_url
      end
      # A link to the artist's profile page on the site.
      def profile_url
        nil
      end
      def artist_name
        raise NotImplementedError
      end
      def artist_commentary_title
        nil
      end
      def artist_commentary_desc
        nil
      end
      # Subclasses should merge in any required headers needed to access resources
      # on the site.
      def headers
        return Danbooru.config.http_headers
      end
      # Returns the size of the image resource without actually downloading the file.
      def size
        Downloads::File.new(image_url).size
      end
      memoize :size
      # Subclasses should return true only if the URL is in its final normalized form.
      #
-      # Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
+      # Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
      # => true
-      # Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
+      # Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
      # => false
      def normalized_for_artist_finder?
        false
@@ -44,32 +111,33 @@ module Sources
      # Subclasses should return true only if the URL is a valid URL that could
      # be converted into normalized form.
      #
-      # Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
+      # Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
      # => true
-      # Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
+      # Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
      # => false
      def normalizable_for_artist_finder?
        false
      end
-      def normalize_for_artist_finder!
+      def normalize_for_artist_finder
-        url
+        profile_url || url
      end
      def site_name
        raise NotImplementedError
      end
      # A unique identifier for the artist. This is used for artist creation.
      def unique_id
        artist_name
      end
      def artists
-        Artist.find_artists(url, referer_url)
+        Artist.find_artists(profile_url)
      end
-      def image_urls
+      def file_url
-        [image_url]
+        image_url
      end
      def data
        {}
      end
      def tags
@@ -97,11 +165,6 @@ module Sources
        translated_tags
      end
      # Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
      def fake_referer
        nil
      end
      def dtext_artist_commentary_title
        self.class.to_dtext(artist_commentary_title)
      end
@@ -110,9 +173,40 @@ module Sources
        self.class.to_dtext(artist_commentary_desc)
      end
      # A strategy may return extra data unrelated to the file
      def data
        return {}
      end
      def to_h
        return {
          :artist_name => artist_name,
          :artists => artists.as_json(include: :sorted_urls),
          :profile_url => profile_url,
          :image_url => image_url,
          :image_urls => image_urls,
          :normalized_for_artist_finder_url => normalize_for_artist_finder,
          :tags => tags,
          :translated_tags => translated_tags,
          :unique_id => unique_id,
          :artist_commentary => {
            :title => artist_commentary_title,
            :description => artist_commentary_desc,
            :dtext_title => dtext_artist_commentary_title,
            :dtext_description => dtext_artist_commentary_desc,
          }
        }
      end
      def to_json
        to_h.to_json
      end
    protected
-      def agent
+
-        raise NotImplementedError
+      def http_exists?(url, headers)
        res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
        res.success?
      end
      # Convert commentary to dtext by stripping html tags. Sites can override
--- a/app/logical/sources/strategies/deviant_art.rb
+++ b/app/logical/sources/strategies/deviant_art.rb
@@ -1,44 +1,127 @@
 module Sources
  module Strategies
    class DeviantArt < Base
-      extend Memoist
+      ATTRIBUTED_ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
      ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
      PATH_ART = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
      RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
      SUBDOMAIN_ART = %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
      PROFILE = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/?\z}
-      def self.url_match?(url)
+      def self.match?(*urls)
-        url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
+        urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
      end
      def self.normalize(url)
        if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
          "http://fav.me/d#{$1}"
        elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
          "http://fav.me/d#{$1}"
        elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
          url
        elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
          "http://www.deviantart.com/#{$1}#{$2}"
        else
          url
        end
      end
      def referer_url
        if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
          @referer_url
        else
          @url
        end
      end
      def site_name
        "Deviant Art"
      end
-      def unique_id
+      def image_urls
-        artist_name
+        # normalize thumbnails
        if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
          match = $1
          return [url.sub(match + "200H/", match)]
        end
-      def get
+        if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
-        # no-op
+          match = $1
          return [url.sub(match + "PRE/", match)]
        end
        # return direct links
        if url =~ ATTRIBUTED_ASSET || url =~ ASSET
          return [url]
        end
        # work is deleted, use image url as given by user.
        if uuid.nil?
          return [url]
        end
        # work is downloadable
        if api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
          src = api_download[:src]
          src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
          src.gsub!(/\?.*\z/, "") # strip s3 query params
          src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
          return [src]
        end
        # work isn't downloadable, or download size is same as regular size.
        if api_deviation.present?
          return [api_deviation.dig(:content, :src)]
        end
        raise "Couldn't find image url"
      end
      def page_url
        [url, referer_url].each do |x|
          if x =~ ATTRIBUTED_ASSET
            return "http://fav.me/d#{$1}"
          end
          if x =~ ASSET
            return "http://fav.me/d#{$1}"
          end
          if x =~ PATH_ART
            return x
          end
          if x !~ RESERVED_SUBDOMAINS && x =~ SUBDOMAIN_ART
            return "http://www.deviantart.com/#{$1}#{$2}"
          end
        end
        return super
      end
      def profile_url
        if url =~ PROFILE
          return url
        end
        if artist_name.blank?
          return nil
        end
        return "https://www.deviantart.com/#{artist_name}"
      end
      def artist_name
        api_metadata.dig(:author, :username).try(&:downcase)
      end
      def artist_commentary_title
        api_metadata[:title]
      end
      def artist_commentary_desc
        api_metadata[:description]
      end
      def normalized_for_artist_finder?
        url =~ PROFILE
      end
      def normalizable_for_artist_finder?
        url =~ PATH_ART || url =~ SUBDOMAIN_ART
      end
      def normalize_for_artist_finder
        profile_url
      end
      def tags
        if api_metadata.blank?
          return []
        end
        api_metadata[:tags].map do |tag|
          [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
        end
      end
      def dtext_artist_commentary_desc
@@ -71,75 +154,24 @@ module Sources
        end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
      end
-      def artist_name
+    public
        api_metadata.dig(:author, :username).try(&:downcase)
      end
      def profile_url
        return "" if artist_name.blank?
        "https://www.deviantart.com/#{artist_name}"
      end
      def image_url
        # work is deleted, use image url as given by user.
        if uuid.nil?
          url
        # work is downloadable
        elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
          src = api_download[:src]
          src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
          src.gsub!(/\?.*\z/, "") # strip s3 query params
          src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
          src
        # work isn't downloadable, or download size is same as regular size.
        elsif api_deviation.present?
          api_deviation.dig(:content, :src)
        else
          raise "couldn't find image url"
        end
      end
      def tags
        return [] if api_metadata.blank?
        api_metadata[:tags].map do |tag|
          [tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
        end
      end
      def artist_commentary_title
        api_metadata[:title]
      end
      def artist_commentary_desc
        api_metadata[:description]
      end
      def normalizable_for_artist_finder?
        url !~ %r!^https?://www.deviantart.com/!
      end
      def normalized_for_artist_finder?
        url =~ %r!^https?://www.deviantart.com/! 
      end
      def normalize_for_artist_finder!
        profile_url
      end
      protected
      def normalized_url
        @normalized_url ||= self.class.normalize(url)
      end
      def page
-        options = Danbooru.config.httparty_options.deep_merge(format: :plain, headers: { "Accept-Encoding" => "gzip" })
+        options = Danbooru.config.httparty_options.deep_merge(
-        resp = HTTParty.get(normalized_url, **options)
+          format: :plain, 
          headers: { "Accept-Encoding" => "gzip" }
        )
        resp = HTTParty.get(page_url, **options)
        if resp.success?
          body = Zlib.gunzip(resp.body)
        else
          raise HTTParty::ResponseError.new(resp)
        end
        Nokogiri::HTML(body)
      end
      memoize :page
      # Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
      # For private works the UUID will be nil.
@@ -151,29 +183,39 @@ module Sources
        uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
        uuid
      end
      memoize :uuid
      def api_client
-        api_client = DeviantArtApiClient.new(Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options)
+        api_client = DeviantArtApiClient.new(
-        api_client.access_token = Cache.get("da-access-token", 55.minutes) { api_client.access_token.to_hash }
+          Danbooru.config.deviantart_client_id, 
          Danbooru.config.deviantart_client_secret, 
          Danbooru.config.httparty_options
        )
        api_client.access_token = Cache.get("da-access-token", 55.minutes) do
          api_client.access_token.to_hash
        end
        api_client
      end
      memoize :api_client
      def api_deviation
        return {} if uuid.nil?
        api_client.deviation(uuid)
      end
      memoize :api_deviation
      def api_metadata
        return {} if uuid.nil?
        api_client.metadata(uuid)[:metadata].first
      end
      memoize :api_metadata
      def api_download
        return {} if uuid.nil?
        api_client.download(uuid)
      end
      memoize :api_download
      memoize :page, :uuid, :api_client, :api_deviation, :api_metadata, :api_download
    end
  end
 end
--- a/app/logical/sources/strategies/moebooru.rb
+++ b/app/logical/sources/strategies/moebooru.rb
@@ -0,0 +1,35 @@
 module Sources
  module Strategies
    class Moebooru < Base
      DOMAINS = /(?:[^.]+\.)?yande\.re|konachan\.com/
      def self.match?(*urls)
        urls.compact.any? { |x| x.match?(DOMAINS) }
      end
      def site_name
        URI.parse(url).host
      end
      def image_url
        if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
          return $1 + "/image/" + $2 + ".png"
        end
        return url
      end
      def page_url
        return url
      end
      def profile_url
        return url
      end
      def artist_name
        return ""
      end
    end
  end
 end
--- a/app/logical/sources/strategies/nico_seiga.rb
+++ b/app/logical/sources/strategies/nico_seiga.rb
@@ -1,145 +1,153 @@
 module Sources
  module Strategies
    class NicoSeiga < Base
-      extend Memoist
+      URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
      DIRECT = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
      PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
      PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
-      def self.url_match?(url)
+      def self.match?(*urls)
-        url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/
+        urls.compact.any? { |x| x.match?(URL) }
      end
      def referer_url
        if @referer_url =~ /seiga\.nicovideo\.jp\/seiga\/im\d+/ && @url =~ /http:\/\/lohas\.nicoseiga\.jp\/(?:priv|o)\//
          @referer_url
        else
          @url
        end
      end
      def site_name
        "Nico Seiga"
      end
-      def unique_id
+      def image_urls
-        profile_url =~ /\/illust\/(\d+)/
+        if url =~ DIRECT
-        "nicoseiga" + $1
+          return [url]
        end
      def get
        page = load_page
        @artist_name, @profile_url = get_profile_from_api
        @image_url = get_image_url_from_page(page)
        @artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api
        # Log out before getting the tags.
        # The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily.
        # This does not apply if you're logged out (or if you're viewing an adult-rated work).
        agent.cookie_jar.clear!
        agent.get(normalized_url) do |page|
          @tags = get_tags_from_page(page)
        end
      end
      def normalized_for_artist_finder?
        url =~ %r!https?://seiga\.nicovideo\.jp/user/illust/\d+/!i
      end
      def normalizable_for_artist_finder?
        url =~ %r!https?://seiga\.nicovideo\.jp/seiga/im\d+!i
      end
      def normalize_for_artist_finder!
        page = load_page
        @illust_id = get_illust_id_from_url
        @artist_name, @profile_url = get_profile_from_api
        @profile_url + "/"
      end
    protected
      def api_client
        NicoSeigaApiClient.new(get_illust_id_from_url)
      end
      def get_illust_id_from_url
        if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)!
          $1.to_i
        else
          nil
        end
      end
      def load_page
        page = agent.get(normalized_url)
        if page.search("a#link_btn_login").any?
          # Session cache is invalid, clear it and log in normally.
          Cache.delete("nico-seiga-session")
          @agent = nil
          page = agent.get(normalized_url)
        end
        page
      end
      def get_profile_from_api
        return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"]
      end
      def get_image_url_from_page(page)
        link = page.search("a#illust_link")
        if link.any?
          image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
          page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
          if page.is_a?(Mechanize::Image)
-            return page.uri.to_s
+            return [page.uri.to_s]
          end
          images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
          if images.any?
-            image_url = "http://lohas.nicoseiga.jp" + images[0]["data-src"]
+            return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
          end
        else
          image_url = nil
        end
        return image_url
      end
      def get_tags_from_page(page)
        links = page.search("a.tag")
        links.map do |node|
          [node.text, "http://seiga.nicovideo.jp" + node.attr("href")]
          end
        end
-      def get_artist_commentary_from_api
+        raise "image url not found for (#{url}, #{referer_url})"
        [api_client.title, api_client.desc]
      end
-      def normalized_url
+      def page_url
-        @normalized_url ||= begin
+        [url, referer_url].each do |x|
-          if url =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
+          if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
+          end
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
+
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
+          end
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
+
-          elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
-            "http://seiga.nicovideo.jp/seiga/im#{$1}"
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
-          elsif url =~ %r{/seiga/im\d+}
+          end
-            url
+
-          else
+          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
-            nil
+            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
          end
          if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
            return "http://seiga.nicovideo.jp/seiga/im#{$1}"
          end
          if x =~ %r{/seiga/im\d+}
            return x
          end
        end
        return super
      end
      def profile_url
        if url =~ PROFILE
          return url
        end
        "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
      end
      def artist_name
        api_client.moniker
      end
      def artist_commentary_title
        api_client.title
      end
      def artist_commentary_desc
        api_client.desc
      end
      def headers
        super.merge(
          "Referer" => "https://seiga.nicovideo.jp"
        )
      end
      def normalized_for_artist_finder?
        url =~ PROFILE
      end
      def normalizable_for_artist_finder?
        url =~ PAGE || url =~ PROFILE
      end
      def normalize_for_artist_finder
        "#{profile_url}/"
      end
      def unique_id
        "nicoseiga#{api_client.user_id}"
      end
      def tags
        string = page.at("meta[name=keywords]").try(:[], "content") || ""
        string.split(/,/).map do |name|
          [name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
        end
      end
      memoize :tags
    public
      def api_client
        NicoSeigaApiClient.new(illust_id)
      end
      memoize :api_client
      def illust_id
        if page_url =~ PAGE
          return $1.to_i
        end
        return nil
      end
      def page
        doc = agent.get(page_url)
        if doc.search("a#link_btn_login").any?
          # Session cache is invalid, clear it and log in normally.
          Cache.delete("nico-seiga-session")
          doc = agent.get(page_url)
        end
        doc
      end
      memoize :page
      def agent
        @agent ||= begin
        mech = Mechanize.new
        mech.redirect_ok = false
        mech.keep_alive = false
@@ -174,9 +182,7 @@ module Sources
        mech.redirect_ok = true
        mech
      end
-      end
+      memoize :agent
      memoize :api_client
    end
  end
 end
--- a/app/logical/sources/strategies/nijie.rb
+++ b/app/logical/sources/strategies/nijie.rb
@@ -1,127 +1,122 @@
 module Sources
  module Strategies
    class Nijie < Base
-      attr_reader :image_urls
+      PICTURE = %r{pic\d+\.nijie.info/nijie_picture/}
      PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+}
      DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
-      def self.url_match?(url)
+      def self.match?(*urls)
-        url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/
+        urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) }
      end
      def initialize(url, referer_url=nil)
        super(normalize_url(url), normalize_url(referer_url))
      end
      def referer_url
        if @referer_url =~ /nijie\.info\/view\.php.+id=\d+/ && @url =~ /pic\d+\.nijie.info\/nijie_picture\//
          @referer_url
        else
          @url
        end
      end
      def site_name
        "Nijie"
      end
      def image_urls
        if url =~ PICTURE
          return [url]
        end
        # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
        # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
        if url =~ DIFF
          return [normalize_thumbnails(url)]
        end
        page.search("div#gallery a > img").map do |img|
          # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
          # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
          normalize_thumbnails("https:" + img.attr("src"))
        end.uniq
      end
      def page_url
        [url, referer_url].each do |x|
          if x =~ PAGE
            return x
          end
          if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
            return "https://nijie.info/view.php?id=#{$1}"
          end
        end
        return super
      end
      def profile_url
        links = page.search("a.name")
        if links.any?
          return "https://nijie.info/" + links[0]["href"]
        end
        return nil
      end
      def artist_name
        links = page.search("a.name")
        if links.any?
          return links[0].text
        end
        return nil
      end
      def artist_commentary_title
        page.search("h2.illust_title").text
      end
      def artist_commentary_desc
        page.search('meta[property="og:description"]').attr("content").value
      end
      def tags
        links = page.search("div#view-tag a").find_all do |node|
          node["href"] =~ /search\.php/
        end
        if links.any?
          return links.map do |node|
            [node.inner_text, "https://nijie.info" + node.attr("href")]
          end
        end
        return []
      end
      def unique_id
        profile_url =~ /nijie\.info\/members.php\?id=(\d+)/
        "nijie" + $1.to_s
      end
-      def image_url
+    public
        image_urls.first
      end
      def get
        page = agent.get(referer_url)
        if page.search("div#header-login-container").any?
          # Session cache is invalid, clear it and log in normally.
          Cache.delete("nijie-session")
          @agent = nil
          page = agent.get(referer_url)
        end
        @artist_name, @profile_url = get_profile_from_page(page)
        @image_urls = get_image_urls_from_page(page)
        @tags = get_tags_from_page(page)
        @artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page)
      end
    protected
      def self.to_dtext(text)
        text = text.gsub(/\r\n|\r/, "<br>")
        DText.from_html(text).strip
      end
-      def get_commentary_from_page(page)
+      def normalize_thumbnails(x)
-        title = page.search("h2.illust_title").text
+        x.gsub(%r!__rs_l120x120/!i, "")
        desc = page.search('meta[property="og:description"]').attr("content").value
        [title, desc]
      end
-      def get_profile_from_page(page)
+      def page
-        links = page.search("a.name")
+        doc = agent.get(page_url)
-        if links.any?
+        if doc.search("div#header-login-container").any?
-          profile_url = "http://nijie.info/" + links[0]["href"]
+          # Session cache is invalid, clear it and log in normally.
-          artist_name = links[0].text
+          Cache.delete("nijie-session")
-        else
+          doc = agent.get(page_url)
          profile_url = nil
          artist_name = nil
        end
-        return [artist_name, profile_url].compact
+        return doc
      end
      def get_image_urls_from_page(page)
        page.search("div#gallery a > img").map do |img|
          # //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
          # => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
          url = "https:" + img.attr("src")
          normalize_image_url(url)
        end
      end
      def get_tags_from_page(page)
        # puts page.root.to_xhtml
        links = page.search("div#view-tag a").find_all do |node|
          node["href"] =~ /search\.php/
        end
        if links.any?
          links.map do |node|
            [node.inner_text, "http://nijie.info" + node.attr("href")]
          end
        else
          []
        end
      end
      def normalize_url(url)
        if url =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
          return "http://nijie.info/view.php?id=#{$1}"
        else
          return url
        end
      end
      def normalize_image_url(image_url)
        # http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
        # => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
        if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
          image_url = image_url.gsub(%r!__rs_l120x120/!i, "")
        end
        image_url = image_url.gsub(%r!\Ahttp:!i, "https:")
        image_url
      end
      memoize :page
      def agent
        @agent ||= begin
        mech = Mechanize.new
        session = Cache.get("nijie-session")
@@ -131,14 +126,14 @@ module Sources
          cookie.path = "/"
          mech.cookie_jar.add(cookie)
        else
-            mech.get("http://nijie.info/login.php") do |page|
+          mech.get("https://nijie.info/login.php") do |page|
            page.form_with(:action => "/login_int.php") do |form|
              form['email'] = Danbooru.config.nijie_login
              form['password'] = Danbooru.config.nijie_password
            end.click_button
          end
          session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
-            Cache.put("nijie-session", session.value, 1.month) if session
+          Cache.put("nijie-session", session.value, 1.day) if session
        end
        # This cookie needs to be set to allow viewing of adult works while anonymous
@@ -148,8 +143,16 @@ module Sources
        mech.cookie_jar.add(cookie)
        mech
-        end
+
-      end
+      rescue Mechanize::ResponseCodeError => x
        if x.response_code.to_i == 429
          sleep(5)
          retry
        else
          raise
        end
      end
      memoize :agent
    end
  end
 end
--- a/app/logical/sources/strategies/null.rb
+++ b/app/logical/sources/strategies/null.rb
@@ -0,0 +1,43 @@
 module Sources
  module Strategies
    class Null < Base
      def self.match?(*urls)
        true
      end
      def image_urls
        [url]
      end
      def page_url
        url
      end
      def normalized_for_artist_finder?
        true
      end
      def normalizable_for_artist_finder?
        false
      end
      def normalize_for_artist_finder
        url
      end
      def site_name
        URI.parse(url).hostname || "N/A"
      rescue
        "N/A"
      end
      def unique_id
        url
      end
      def rewrite(url, headers, data)
        return [url, headers, data]
      end
    end
  end
 end
--- a/app/logical/sources/strategies/pawoo.rb
+++ b/app/logical/sources/strategies/pawoo.rb
@@ -1,62 +1,80 @@
 # html page urls:
 #   https://pawoo.net/@evazion/19451018
 #   https://pawoo.net/web/statuses/19451018
 #
 # image urls:
 #   https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
 #   https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
 #   https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
 #
 # artist urls:
 #   https://pawoo.net/@evazion
 #   https://pawoo.net/web/accounts/47806
 module Sources::Strategies
  class Pawoo < Base
-    attr_reader :image_urls
+    IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!
-    def self.url_match?(url)
+    def self.match?(*urls)
-      PawooApiClient::Status.is_match?(url) || PawooApiClient::Account.is_match?(url)
+      urls.compact.any? do |x| 
        x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x)
      end
    def referer_url
      normalized_url
    end
    def site_name
      "Pawoo"
    end
-    def api_response
+    def image_url
-      @response ||= PawooApiClient.new.get(normalized_url)
+      image_urls.first
    end
-    def get
+    # https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
-      response = api_response
+    # https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
-      @artist_name = response.account_name
+    # https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
-      @profile_url = response.profile_url
+    def image_urls
-      @image_url = response.image_urls.first
+      if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i
-      @image_urls = response.image_urls
+        return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
      @tags = response.tags
      @artist_commentary_title = nil
      @artist_commentary_desc = response.commentary
      end
-    def normalized_url
+      if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i
-      if self.class.url_match?(@url)
+        return [url]
        @url
      elsif self.class.url_match?(@referer_url)
        @referer_url
      end
      return api_response.image_urls
    end
    # https://pawoo.net/@evazion/19451018
    # https://pawoo.net/web/statuses/19451018
    def page_url
      [url, referer_url].each do |x|
        if PawooApiClient::Status.is_match?(x)
          return x
        end
      end
      return super
    end
    # https://pawoo.net/@evazion
    # https://pawoo.net/web/accounts/47806
    def profile_url
      if url =~ PawooApiClient::PROFILE2
        return "https://pawoo.net/@#{$1}"
      end
      api_response.profile_url
    end
    def artist_name
      api_response.account_name
    end
    def artist_commentary_title
      nil
    end
    def artist_commentary_desc
      api_response.commentary
    end
    def tags
      api_response.tags
    end
    def normalizable_for_artist_finder?
      true
    end
-    def normalize_for_artist_finder!
+    def normalize_for_artist_finder
-      get
+      profile_url
      @profile_url || @url
    end
    def dtext_artist_commentary_desc
@@ -68,5 +86,18 @@ module Sources::Strategies
        end
      end.strip
    end
  public
    def api_response
      [url, referer_url].each do |x|
        if client = PawooApiClient.new.get(x)
          return client
        end
      end
      nil
    end
    memoize :api_response
  end
 end
--- a/app/logical/sources/strategies/pixiv.rb
+++ b/app/logical/sources/strategies/pixiv.rb
@@ -1,122 +1,23 @@
 # encoding: UTF-8
 require 'csv'
 module Sources
  module Strategies
    class Pixiv < Base
-      attr_reader :zip_url, :ugoira_frame_data, :ugoira_content_type
+      MONIKER = %r!(?:[a-zA-Z0-9_-]+)!
      PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z!
      EXT =     %r!(?:jpg|jpeg|png|gif)!i
-      MONIKER   = '(?:[a-zA-Z0-9_-]+)'
+      WEB =     %r!(?:\A(?:https?://)?www\.pixiv\.net)!
-      TIMESTAMP = '(?:[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2})'
+      I12 =     %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)!
-      EXT = "(?:jpg|jpeg|png|gif)"
+      IMG =     %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)!
      PXIMG =   %r!(?:\A(?:https?://)?i\.pximg\.net)!
      TOUCH =   %r!(?:\A(?:https?://)?touch\.pixiv\.net)!
      NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))!
      FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
      FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!
-      WEB =   '(?:\A(?:https?://)?www\.pixiv\.net)'
+      def self.match?(*urls)
-      I12 =   '(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)'
+        urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}/i) }
      IMG =   '(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)'
      PXIMG = '(?:\A(?:https?://)?i\.pximg\.net)'
      TOUCH = '(?:\A(?:https?://)?touch\.pixiv\.net)'
      def self.url_match?(url)
        url =~ /#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}/i
      end
      def referer_url
        if @referer_url =~ /pixiv\.net\/member_illust.+mode=medium/ && @url =~ /#{IMG}|#{I12}/
          @referer_url
        else
          @url
        end
      end
      def site_name
        "Pixiv"
      end
      def unique_id
        @pixiv_moniker
      end
      def fake_referer
        "http://www.pixiv.net"
      end
      def normalized_for_artist_finder?
        url =~ %r!\Ahttp://www\.pixiv\.net/member\.php\?id=[0-9]+\z/!
      end
      def normalizable_for_artist_finder?
        has_moniker? || sample_image? || full_image? || work_page?
      end
      def normalize_for_artist_finder!
        @illust_id = illust_id_from_url!
        @metadata = get_metadata_from_papi(@illust_id)
        "http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
      end
      def translate_tag(tag)
        normalized_tag = tag.gsub(/\d+users入り\z/i, "")
        translated_tags = super(normalized_tag)
        if translated_tags.empty? && normalized_tag.include?("/")
          translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
        end
        translated_tags
      end
      def get
        return unless illust_id_from_url
        @illust_id = illust_id_from_url
        @metadata = get_metadata_from_papi(@illust_id)
        page = agent.get(URI.parse(normalized_url))
        if page.search("body.not-logged-in").any?
          # Session cache is invalid, clear it and log in normally.
          Cache.delete("pixiv-phpsessid")
          @agent = nil
          page = agent.get(URI.parse(normalized_url))
        end
        @artist_name = @metadata.name
        @profile_url = "http://www.pixiv.net/member.php?id=#{@metadata.user_id}"
        @pixiv_moniker = @metadata.moniker
        @zip_url, @ugoira_frame_data, @ugoira_content_type = get_zip_url_from_api
        @tags = @metadata.tags.map do |tag|
          [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
        end
        @page_count = @metadata.page_count
        @artist_commentary_title = @metadata.artist_commentary_title
        @artist_commentary_desc = @metadata.artist_commentary_desc
        is_manga = @page_count > 1
        if !@zip_url
          page = manga_page_from_url(@url).to_i
          @image_url = image_urls[page]
        end
      end
      def rewrite_thumbnails(thumbnail_url, is_manga=nil)
        thumbnail_url = rewrite_new_medium_images(thumbnail_url)
        thumbnail_url = rewrite_medium_ugoiras(thumbnail_url)
        thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
        return thumbnail_url
      end
      def agent
        @agent ||= PixivWebAgent.build
      end
      def file_url
        image_url || zip_url
      end
      def image_urls
        @metadata.pages
      end
      def self.to_dtext(text)
@@ -137,18 +38,147 @@ module Sources
        DText.from_html(text)
      end
-      def illust_id_from_url
+      def site_name
-        if sample_image? || full_image? || work_page?
+        "Pixiv"
          illust_id_from_url!
        else
          nil
      end
-      rescue Sources::Error
+
-        raise if Rails.env.test?
+      def image_urls
        image_urls_sub.
          map {|x| rewrite_cdn(x)}
      rescue PixivApiClient::BadIDError
        [url]
      end
      def page_url
        if novel_id.present?
          return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
        end
        if fanbox_id.present?
          return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
        end
        if illust_id.present?
          return "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}"
        end
        return url
      rescue PixivApiClient::BadIDError
        nil
      end
-      def illust_id_from_url!
+      def canonical_url
        return image_url
      end
      def profile_url
        [url, referer_url].each do |x|
          if x =~ PROFILE
            return x
          end
        end
        "https://www.pixiv.net/member.php?id=#{metadata.user_id}"
      rescue PixivApiClient::BadIDError
        nil
      end
      def artist_name
        metadata.name
      rescue PixivApiClient::BadIDError
        nil
      end
      def artist_commentary_title
        metadata.artist_commentary_title
      rescue PixivApiClient::BadIDError
        nil
      end
      def artist_commentary_desc
        metadata.artist_commentary_desc
      rescue PixivApiClient::BadIDError
        nil
      end
      def headers
        if fanbox_id.present?
          # need the session to download fanbox images
          return {
            "Referer" => "https://www.pixiv.net/fanbox",
            "Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
          }
        end
        return {
          "Referer" => "https://www.pixiv.net"
        }
      end
      def normalized_for_artist_finder?
        url =~ PROFILE
      end
      def normalizable_for_artist_finder?
        illust_id.present? || novel_id.present? || fanbox_id.present?
      end
      def unique_id
        moniker
      end
      def tags
        metadata.tags.map do |tag|
          [tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
        end
      rescue PixivApiClient::BadIDError
        []
      end
      memoize :tags
      def translate_tag(tag)
        normalized_tag = tag.gsub(/\d+users入り\z/i, "")
        translated_tags = super(normalized_tag)
        if translated_tags.empty? && normalized_tag.include?("/")
          translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
        end
        translated_tags
      end
    public
      def image_urls_sub
        # there's too much normalization bullshit we have to deal with
        # raw urls, so just fetch the canonical url from the api every
        # time.
        if manga_page.present?
          return [metadata.pages[manga_page]]
        end
        if metadata.pages.is_a?(Hash)
          return [ugoira_zip_url]
        end
        return metadata.pages
      end
      def rewrite_cdn(x)
        if x =~ %r{\Ahttps?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
          return x.sub(".edgesuite.net", "")
        end
        return x
      end
      # in order to prevent recursive loops, this method should not make any
      # api calls and only try to extract the illust_id from the url. therefore,
      # even though it makes sense to reference page_url here, it will only look
      # at (url, referer_url).
      def illust_id
        # http://img18.pixiv.net/img/evazion/14901720.png
        #
        # http://i2.pixiv.net/img18/img/evazion/14901720.png
@@ -165,228 +195,166 @@ module Sources
        #
        # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
        if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
-          $1
+          return $1
        end
        [url, referer_url].each do |x|
          # http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
          # http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
          # http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
          # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
-        elsif url =~ /illust_id=(\d+)/i
+          if x =~ /illust_id=(\d+)/i
-          $1
+            return $1
          end
          # http://www.pixiv.net/i/18557054
-        elsif url =~ %r!pixiv\.net/i/(\d+)!i
+          if x =~ %r!pixiv\.net/i/(\d+)!i
-          $1
+            return $1
        else
          raise Sources::Error.new("Couldn't get illust ID from URL: #{url}")
          end
        end
-      # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
+        raise Sources::Error.new("Couldn't get illust ID from URL (#{url}, #{referer_url})")
      # => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
      #
      # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
      # => http://i.pximg.net/img-original/img/2014/05/15/23/53/59/43521009_p1.jpg
      def rewrite_new_medium_images(thumbnail_url)
        if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i ||
           thumbnail_url =~ %r!/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i
          page = manga_page_from_url(@url).to_i
          thumbnail_url = @metadata.pages[page]
      end
      memoize :illust_id
-        thumbnail_url
+      def novel_id
-      end
+        [url, referer_url].each do |x|
-
+          if x =~ NOVEL_PAGE
-      # http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira600x600.zip
+            return $1
      # => http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira1920x1080.zip
      def rewrite_medium_ugoiras(thumbnail_url)
        if thumbnail_url =~ %r!/img-zip-ugoira/img/.*/\d+_ugoira600x600.zip!i
          thumbnail_url = thumbnail_url.sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
        end
        thumbnail_url
      end
      # If the thumbnail is for a manga gallery, it needs to be rewritten like this:
      #
      # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
      # => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
      #
      # Otherwise, it needs to be rewritten like this:
      #
      # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
      # => http://i2.pixiv.net/img18/img/evazion/14901720.png
      #
      def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
        if thumbnail_url =~ %r!/img/#{MONIKER}/\d+_[ms]\.#{EXT}!i
          if is_manga.nil?
            page_count = @metadata.page_count
            is_manga = page_count > 1
          end
          if is_manga
            page = manga_page_from_url(@url)
            return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
          else
            return thumbnail_url.sub(/_[ms]\./, ".")
          end
        end
-        return thumbnail_url
+        return nil
      end
      memoize :novel_id
      def fanbox_id
        [url, referer_url].each do |x|
          if x =~ FANBOX_PAGE
            return $1
          end
-      def manga_page_from_url(url)
+          if x =~ FANBOX_IMAGE
            return $1
          end
        end
        return nil
      end
      memoize :fanbox_id
      def agent
        PixivWebAgent.build
      end
      memoize :agent
      def page
        agent.get(URI.parse(page_url))
        if page.search("body.not-logged-in").any?
          # Session cache is invalid, clear it and log in normally.
          Cache.delete("pixiv-phpsessid")
          @agent = nil
          page = agent.get(URI.parse(page_url))
        end
        page
      end
      memoize :page
      def metadata
        if novel_id.present?
          return PixivApiClient.new.novel(novel_id)
        end
        if fanbox_id.present?
          return PixivApiClient.new.fanbox(fanbox_id)
        end
        return PixivApiClient.new.work(illust_id)
      end
      memoize :metadata
      def moniker
        # we can sometimes get the moniker from the url
        if url =~ %r!#{IMG}/img/(#{MONIKER})!i
          return $1
        end
        if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
          return $1
        end
        if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
          return $1
        end
        return metadata.moniker
      end
      memoize :moniker
      def page_count
        metadata.page_count
      end
      def data
        return {
          ugoira_frame_data: ugoira_frame_data
        }
      end
      def ugoira_zip_url
        if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
          return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
        end
      end
      memoize :ugoira_zip_url
      def ugoira_frame_data
        return metadata.json.dig("metadata", "frames")
      end
      memoize :ugoira_frame_data
      def ugoira_content_type
        case metadata.json["image_urls"].to_s
        when /\.jpg/
          return "image/jpeg"
        when /\.png/
          return "image/png"
        when /\.gif/
          return "image/gif"
        end
        raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
      end
      memoize :ugoira_content_type
      def is_manga?
        page_count > 1
      end
      # Returns the current page number of the manga. This will not
      # make any api calls and only looks at (url, referer_url).
      def manga_page
        # http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
        # http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
        # http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
        if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i
-          $1
+          return $1.to_i
        end
        # http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
-        elsif url =~ /page=(\d+)/i
+        [url, referer_url].each do |x|
-          $1
+          if x =~ /page=(\d+)/i
-
+            return $1.to_i
        else
          0
          end
        end
-      def get_moniker_from_url
+        return nil
        case url
        when %r!#{IMG}/img/(#{MONIKER})!i
          $1
        when %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
          $1
        when %r!#{WEB}/stacc/(#{MONIKER})/?$!i
          $1
        else
          false
        end
      end
      def has_moniker?
        get_moniker_from_url != false
      end
      def get_image_url_from_page(page, is_manga)
        if is_manga
          elements = page.search("div.works_display a img").find_all do |node|
            node["src"] !~ /source\.pixiv\.net/
          end
        else
          elements = page.search("div.works_display div img.big")
          elements = page.search("div.works_display div img") if elements.empty?
        end
        if elements.any?
          element = elements.first
          thumbnail_url = element.attr("src") || element.attr("data-src")
          return rewrite_thumbnails(thumbnail_url, is_manga)
        end
        if page.body =~ /"original":"(https:.+?)"/
          return $1.gsub(/\\\//, '/')
        end
      end
      def get_zip_url_from_api
        if @metadata.pages.is_a?(Hash) && @metadata.pages["ugoira600x600"]
          zip_url = @metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
          frame_data = @metadata.json["metadata"]["frames"]
          content_type = nil
          case @metadata.json["image_urls"].to_s
          when /\.jpg/
            content_type = "image/jpeg"
          when /\.png/
            content_type = "image/png"
          when /\.gif/
            content_type = "image/gif"
          end
          return [zip_url, frame_data, content_type]
        end
      end
      def get_zip_url_from_page(page)
        scripts = page.search("body script").find_all do |node|
          node.text =~ /_ugoira600x600\.zip/
        end
        if scripts.any?
          javascript = scripts.first.text
          json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
          data = JSON.parse(json)
          zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
          frame_data = data["frames"]
          content_type = data["mime_type"]
          return [zip_url, frame_data, content_type]
        end
      end
      def normalized_url
        "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
      end
      def get_metadata_from_papi(illust_id)
        @metadata ||= PixivApiClient.new.works(illust_id)
      end
      def work_page?
        return true if url =~ %r!(?:#{WEB}|#{TOUCH})/member_illust\.php! && url =~ %r!mode=(?:medium|big|manga|manga_big)! && url =~ %r!illust_id=\d+!
        return true if url =~ %r!(?:#{WEB}|#{TOUCH})/i/\d+$!i
        return false
      end
      def full_image?
        # http://img18.pixiv.net/img/evazion/14901720.png?1234
        return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
        # http://i2.pixiv.net/img18/img/evazion/14901720.png
        # http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
        return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
        # http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
        return true if url =~ %r!#{I12}/img-original/img/#{TIMESTAMP}/\d+_p\d+\.#{EXT}$!i
        # http://i.pximg.net/img-original/img/2017/03/22/17/40/51/62041488_p0.jpg
        return true if url =~ %r!#{PXIMG}/img-original/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
        # http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
        return true if url =~ %r!(#{I12}|#{PXIMG})/img-zip-ugoira/img/#{TIMESTAMP}/\d+_ugoira\d+x\d+\.zip$!i
        return false
      end
      def sample_image?
        # http://img18.pixiv.net/img/evazion/14901720_m.png
        return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
        # http://i2.pixiv.net/img18/img/evazion/14901720_m.png
        # http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
        return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
        # http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
        # http://i2.pixiv.net/c/64x64/img-master/img/2014/10/09/12/59/50/46441917_square1200.jpg
        return true if url =~ %r!#{I12}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}$!i
        # http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
        return true if url =~ %r!#{PXIMG}/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
        # http://i.pximg.net/c/600x600/img-master/img/2017/03/22/17/40/51/62041488_p0_master1200.jpg
        return true if url =~ %r!#{PXIMG}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
        # http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
        # http://i2.pixiv.net/img-inf/img/2010/11/30/08/54/06/14901765_64x64.jpg
        return true if url =~ %r!#{I12}/img-inf/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
        return false
      end
      memoize :manga_page
    end
  end
 end
--- a/app/logical/sources/strategies/tumblr.rb
+++ b/app/logical/sources/strategies/tumblr.rb
@@ -1,28 +1,52 @@
 module Sources::Strategies
  class Tumblr < Base
-    extend Memoist
+    DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
    MD5 = %r{(?<md5>[0-9a-f]{32})}i
    FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
    SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i
    EXT = %r{(?<ext>\w+)}
    IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
    POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
-    def self.url_match?(url)
+    def self.match?(*urls)
      urls.compact.any? do |url|
        blog_name, post_id = parse_info_from_url(url)
-      blog_name.present? && post_id.present?
+        url =~ IMAGE || blog_name.present? && post_id.present?
      end
    end
-    def referer_url
+    def self.parse_info_from_url(url)
-      blog_name, post_id = self.class.parse_info_from_url(normalized_url)
+      if url =~ POST
-      "https://#{blog_name}.tumblr.com/post/#{post_id}"
+        [$~[:blog_name], $~[:post_id]]
      else
        []
      end
    def tags
      post[:tags].map do |tag|
        # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
        [tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
      end.uniq
    end
    def site_name
      "Tumblr"
    end
    def image_urls
      image_urls_sub
        .uniq
        .map {|x| normalize_cdn(x)}
        .map {|x| find_largest(x)}
        .compact
        .uniq
    end
    def page_url
      [url, referer_url].each do |x|
        if x =~ POST
          blog_name, post_id = self.class.parse_info_from_url(x)
          return "https://#{blog_name}.tumblr.com/post/#{post_id}"
        end
      end
      return super
    end
    def profile_url
      "https://#{artist_name}.tumblr.com/"
    end
@@ -35,8 +59,10 @@ module Sources::Strategies
      case post[:type]
      when "text", "link"
        post[:title]
      when "answer"
        "#{post[:asking_name]} asked: #{post[:question]}"
      else
        nil
      end
@@ -46,94 +72,133 @@ module Sources::Strategies
      case post[:type]
      when "text"
        post[:body]
      when "link"
        post[:description]
      when "photo", "video"
        post[:caption]
      when "answer"
        post[:answer]
      else
        nil
      end
    end
    def tags
      post[:tags].map do |tag|
        # normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
        etag = tag.gsub(/[ _-]/, "_")
        [etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
      end.uniq
    end
    memoize :tags
    def dtext_artist_commentary_desc
      DText.from_html(artist_commentary_desc).strip
    end
-    def image_url
+  public
-      image_urls.first
+
    def image_urls_sub
      list = []
      if url =~ IMAGE
        list << url
      end
-    def image_urls
+      if page_url !~ POST
-      urls = case post[:type]
+        return list
      when "photo"
        post[:photos].map do |photo|
          self.class.normalize_image_url(photo[:original_size][:url])
        end
      when "video"
        [post[:video_url]]
      else
        []
      end
-      urls += self.class.parse_inline_images(artist_commentary_desc)
+      if post[:type] == "photo"
-      urls
+        list += post[:photos].map do |photo|
-    end
+          photo[:original_size][:url]
    def get
    end
    module HelperMethods
      extend ActiveSupport::Concern
      module ClassMethods
        def parse_info_from_url(url)
          url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
          [$1, $2]
        end
        def parse_inline_images(text)
          html = Nokogiri::HTML.fragment(text)
          image_urls = html.css("img").map { |node| node["src"] }
          image_urls = image_urls.map(&method(:normalize_image_url))
          image_urls
        end
        def normalize_image_url(url)
          url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
          url
        end
      end
-      def normalized_url
+      if post[:type] == "video"
-        if self.class.url_match?(@referer_url)
+        list << post[:video_url]
          @referer_url
        elsif self.class.url_match?(@url)
          @url
      end
      if inline_images.any?
        list += inline_images.to_a
      end
      if list.any?
        return list
      end
      raise "image url not found for (#{url}, #{referer_url})"
    end
    # Normalize cdn subdomains.
    #
    # https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
    # => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
    def normalize_cdn(x)
      # does this work?
      x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com")
    end
    # Look for the biggest available version on media.tumblr.com. A bigger
    # version may or may not exist.
    #
    # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
    # => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
    #
    # https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
    # => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
    #
    # https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
    # => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
    #
    # http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
    # => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
    #
    # http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
    # => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
    def find_largest(x)
      if x =~ IMAGE
        sizes = [1280, 640, 540, "500h", 500, 400, 250]
        candidates = sizes.map do |size|
          "https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
        end
        return candidates.find do |candidate|
          http_exists?(candidate, headers)
        end
      end
-    module ApiMethods
+      return x
    end
    def inline_images
      html = Nokogiri::HTML.fragment(artist_commentary_desc)
      html.css("img").map { |node| node["src"] }
    end
    memoize :inline_images
    def client
      raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
-        ::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
+
      TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
    end
    memoize :client
    def api_response
-        blog_name, post_id = self.class.parse_info_from_url(normalized_url)
+      blog_name, post_id = self.class.parse_info_from_url(page_url)
      raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil?
      client.posts(blog_name, post_id)
    end
    memoize :api_response
    def post
      api_response[:posts].first
    end
  end
    include ApiMethods
    include HelperMethods
    memoize :client, :api_response
  end
 end
--- a/app/logical/sources/strategies/twitter.rb
+++ b/app/logical/sources/strategies/twitter.rb
@@ -1,52 +1,94 @@
 module Sources::Strategies
  class Twitter < Base
-    attr_reader :image_urls
+    PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i
    ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)}!i
-    def self.url_match?(url)
+    def self.match?(*urls)
-      self.status_id_from_url(url).present?
+      urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
    end
-    def referer_url
+    # https://twitter.com/i/web/status/943446161586733056
-      normalized_url
+    # https://twitter.com/motty08111213/status/943446161586733056
    def self.status_id_from_url(url)
      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
        return $1
      end
-    def normalized_url
+      return nil
      "https://twitter.com/#{artist_name}/status/#{status_id}"
    end
    def artist_name
      api_response.attrs[:user][:screen_name]
    end
    def site_name
      "Twitter"
    end
-    def api_response
+    def image_urls
-      @api_response ||= TwitterService.new.client.status(status_id, tweet_mode: "extended")
+      if url =~ /(#{ASSET}[^:]+)/
        return [$1 + ":orig" ]
      end
-    def get
+      [url, referer_url].each do |x|
-      attrs = api_response.attrs
+        if x =~ PAGE
-      @profile_url = "https://twitter.com/" + attrs[:user][:screen_name]
+          return service.image_urls(api_response)
-      @image_urls = TwitterService.new.image_urls(api_response)
+        end
-      @image_url = @image_urls.first
+      end
-      @artist_commentary_title = ""
+    rescue Twitter::Error::NotFound
-      @artist_commentary_desc = attrs[:full_text]
+      url
-      @tags = attrs[:entities][:hashtags].map do |text:, indices:|
+    end
-        [text, "https://twitter.com/hashtag/#{text}"]
+    memoize :image_urls
    def page_url
      [url, referer_url].each do |x|
        if self.class.status_id_from_url(x).present?
          return x
        end
    rescue ::Twitter::Error::Forbidden
      end
-    def normalize_for_artist_finder!
+      return super
-      url.downcase
+    end
    def profile_url
      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i
        if $1 != "i"
          return "https://twitter.com/#{$1}"
        end
      end
      "https://twitter.com/" + api_response.attrs[:user][:screen_name]
    rescue Twitter::Error::NotFound
      nil
    end
    def artist_name
      api_response.attrs[:user][:screen_name]
    rescue Twitter::Error::NotFound
      nil
    end
    def artist_commentary_title
      ""
    end
    def artist_commentary_desc
      api_response.attrs[:full_text]
    rescue Twitter::Error::NotFound
      nil
    end
    def normalizable_for_artist_finder?
-      true
+      url =~ PAGE
    end
    def normalize_for_artist_finder
      profile_url.downcase
    end
    def tags
      api_response.attrs[:entities][:hashtags].map do |text:, indices:|
        [text, "https://twitter.com/hashtag/#{text}"]
      end
    end
    memoize :tags
    def dtext_artist_commentary_desc
      url_replacements = api_response.urls.map do |obj|
        [obj.url.to_s, obj.expanded_url.to_s]
@@ -63,19 +105,23 @@ module Sources::Strategies
      desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]')
      desc.strip
    end
    memoize :dtext_artist_commentary_desc
  public
    def service
      TwitterService.new
    end
    memoize :service
    def api_response
      service.client.status(status_id, tweet_mode: "extended")
    end
    memoize :api_response
    def status_id
-      self.class.status_id_from_url(@url) || self.class.status_id_from_url(@referer_url)
+      [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
    end
    # https://twitter.com/i/web/status/943446161586733056
    # https://twitter.com/motty08111213/status/943446161586733056
    def self.status_id_from_url(url)
      if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
        $1
      else
        nil
      end
    end
    memoize :status_id
  end
 end
--- a/app/logical/twitter_service.rb
+++ b/app/logical/twitter_service.rb
@@ -1,8 +1,9 @@
 class TwitterService
  extend Memoist
  def client
    raise "Twitter API keys not set" if Danbooru.config.twitter_api_key.nil?
    @client ||= begin
    rest_client = ::Twitter::REST::Client.new do |config|
      config.consumer_key = Danbooru.config.twitter_api_key
      config.consumer_secret = Danbooru.config.twitter_api_secret
@@ -15,7 +16,7 @@ class TwitterService
    rest_client
  end
-  end
+  memoize :client
  def extract_urls_for_status(tweet)
    tweet.media.map do |obj|
--- a/app/logical/upload_service.rb
+++ b/app/logical/upload_service.rb
@@ -49,7 +49,7 @@ class UploadService
      @upload.update(status: "processing")
      if @upload.file.nil? && Utils.is_downloadable?(source)
-        @upload.file = Utils.download_for_upload(source, @upload)
+        @upload.file = Utils.download_for_upload(@upload)
      end
      if @upload.file.present?
@@ -111,7 +111,9 @@ class UploadService
      p.image_width = upload.image_width
      p.image_height = upload.image_height
      p.rating = upload.rating
-      p.source = upload.source
+      if upload.source.present?
        p.source = Sources::Strategies.find(upload.source, upload.referer_url).canonical_url
      end
      p.file_size = upload.file_size
      p.uploader_id = upload.uploader_id
      p.uploader_ip_addr = upload.uploader_ip_addr
--- a/app/logical/upload_service/controller_helper.rb
+++ b/app/logical/upload_service/controller_helper.rb
@@ -4,13 +4,8 @@ class UploadService
      upload = Upload.new
      if Utils.is_downloadable?(url) && file.nil?
-        download = Downloads::File.new(url)
+        strategy = Sources::Strategies.find(url, ref)
-        normalized_url = download.rewrite_url()
+        post = Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, strategy.canonical_url]).first
        post = if normalized_url.nil?
          Post.where("SourcePattern(lower(posts.source)) = ?", url).first
        else
          Post.where("SourcePattern(lower(posts.source)) IN (?)", [url, normalized_url]).first
        end
        if post.nil?
          # this gets called from UploadsController#new so we need
@@ -19,13 +14,15 @@ class UploadService
        end
        begin
-          source = Sources::Site.new(url, :referer_url => ref)
+          download = Downloads::File.new(url, ref)
          remote_size = download.size
        rescue Exception
        end
-        return [upload, post, source, normalized_url, remote_size]
+        return [upload, post, strategy, remote_size]
-      elsif file
+      end
      if file
        # this gets called via XHR so we can process sync
        Preprocessor.new(file: file).delayed_start(CurrentUser.id)
      end
@@ -35,9 +32,7 @@ class UploadService
    def self.batch(url, ref = nil)
      if url
-        source = Sources::Site.new(url, :referer_url => ref)
+        return Sources::Strategies.find(url, ref)
        source.get
        return source
      end
    end
  end
--- a/app/logical/upload_service/preprocessor.rb
+++ b/app/logical/upload_service/preprocessor.rb
@@ -1,5 +1,7 @@
 class UploadService
  class Preprocessor
    extend Memoist
    attr_reader :params, :original_post_id
    def initialize(params)
@@ -15,31 +17,40 @@ class UploadService
      params[:md5_confirmation]
    end
-    def referer
+    def referer_url
      params[:referer_url]
    end
-    def normalized_source
+    def strategy
-      @normalized_source ||= begin
+      Sources::Strategies.find(source, referer_url)
        Downloads::File.new(params[:source]).rewrite_url
    end
    memoize :strategy
    # When searching posts we have to use the canonical source
    def canonical_source
      strategy.canonical_url
    end
    memoize :canonical_source
    def in_progress?
      if Utils.is_downloadable?(source)
-        Upload.where(status: "preprocessing", source: normalized_source).or(Upload.where(status: "preprocessing", alt_source: normalized_source)).exists?
+        return Upload.where(status: "preprocessing", source: source).exists?
      elsif md5.present?
        Upload.where(status: "preprocessing", md5: md5).exists?
      else
        false
      end
      if md5.present?
        return Upload.where(status: "preprocessing", md5: md5).exists?
      end
      false
    end
    def predecessor
      if Utils.is_downloadable?(source)
-        Upload.where(status: ["preprocessed", "preprocessing"]).where(source: normalized_source).or(Upload.where(status: ["preprocessed", "preprocessing"], alt_source: normalized_source)).first
+        return Upload.where(status: ["preprocessed", "preprocessing"], source: source).first
-      elsif md5.present?
+      end
-        Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first
+
      if md5.present?
        return Upload.where(status: ["preprocessed", "preprocessing"], md5: md5).first
      end
    end
@@ -59,34 +70,31 @@ class UploadService
    def start!
      if Utils.is_downloadable?(source)
        CurrentUser.as_system do
-          if Post.tag_match("source:#{normalized_source}").where.not(id: original_post_id).exists?
+          if Post.tag_match("source:#{canonical_source}").where.not(id: original_post_id).exists?
-            raise ActiveRecord::RecordNotUnique.new("A post with source #{normalized_source} already exists")
+            raise ActiveRecord::RecordNotUnique.new("A post with source #{canonical_source} already exists")
          end
        end
-        if Upload.where(source: normalized_source, status: "completed").exists?
+        if Upload.where(source: source, status: "completed").exists?
-          raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{normalized_source} already exists")
+          raise ActiveRecord::RecordNotUnique.new("A completed upload with source #{source} already exists")
        end
-        if Upload.where(source: normalized_source).where("status like ?", "error%").exists?
+        if Upload.where(source: source).where("status like ?", "error%").exists?
-          raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{normalized_source} already exists")
+          raise ActiveRecord::RecordNotUnique.new("An errored upload with source #{source} already exists")
        end
      end
      params[:rating] ||= "q"
      params[:tag_string] ||= "tagme"
      upload = Upload.create!(params)
      begin
        upload.update(status: "preprocessing")
-        if Utils.is_downloadable?(source)
+        if params[:file].present?
          # preserve the original source (for twitter, the twimg:orig
          # source, while the status url is stored in upload.source)
          upload.alt_source = normalized_source 
          file = Utils.download_for_upload(source, upload)
        elsif params[:file].present?
          file = params[:file]
        elsif Utils.is_downloadable?(source)
          file = Utils.download_for_upload(upload)
        end
        Utils.process_file(upload, file, original_post_id: original_post_id)
@@ -109,10 +117,7 @@ class UploadService
      # goto whoever submitted the form
      pred.initialize_attributes
-      # we went through a lot of trouble normalizing the source,
+      pred.attributes = self.params
      # so don't overwrite it with whatever the user provided
      pred.source = "" if pred.source.nil?
      pred.attributes = self.params.except(:source)
      # if a file was uploaded after the preprocessing occurred,
      # then process the file and overwrite whatever the preprocessor
--- a/app/logical/upload_service/replacer.rb
+++ b/app/logical/upload_service/replacer.rb
@@ -74,8 +74,8 @@ class UploadService
      if replacement.replacement_file.present?
        replacement.replacement_url = "file://#{replacement.replacement_file.original_filename}"
-      elsif upload.downloaded_source.present?
+      elsif upload.source.present?
-        replacement.replacement_url = upload.downloaded_source
+        replacement.replacement_url = Sources::Strategies.canonical(upload.source, upload.referer_url)
      end
      if md5_changed
@@ -93,7 +93,7 @@ class UploadService
      post.image_width = upload.image_width
      post.image_height = upload.image_height
      post.file_size = upload.file_size
-      post.source = upload.downloaded_source || upload.source
+      post.source = Sources::Strategies.canonical(upload.source, upload.referer_url)
      post.tag_string = upload.tag_string
      update_ugoira_frame_data(post, upload)
--- a/app/logical/upload_service/utils.rb
+++ b/app/logical/upload_service/utils.rb
@@ -200,37 +200,19 @@ class UploadService
      tags.join(" ")
    end
-    def download_from_source(source, referer_url: nil)
+    def download_for_upload(upload)
-      download = Downloads::File.new(source, referer_url: referer_url)
+      download = Downloads::File.new(upload.source, upload.referer_url)
      file, strategy = download.download!
-      file = download.download!
+      if download.data[:ugoira_frame_data]
-      context = {
+        upload.context = { 
-        downloaded_source: download.downloaded_source,
+          "ugoira" => {
-        source: download.source
+            "frame_data" => download.data[:ugoira_frame_data],
            "content_type" => "image/jpeg"
          }
      if download.data[:is_ugoira]
        context[:ugoira] = {
          frame_data: download.data[:ugoira_frame_data],
          content_type: download.data[:ugoira_content_type]
        }
      end
      yield(context)
      return file
    end
    def download_for_upload(source, upload)
      file = download_from_source(source, referer_url: upload.referer_url) do |context|
        upload.downloaded_source = context[:downloaded_source]
        upload.source = context[:source]
        if context[:ugoira]
          upload.context = { ugoira: context[:ugoira] }
        end
      end
      return file
    end
  end
--- a/app/models/artist.rb
+++ b/app/models/artist.rb
@@ -152,7 +152,7 @@ class Artist < ApplicationRecord
        url = ArtistUrl.normalize(url)
        artists = []
-        # return [] unless Sources::Site.new(url).normalized_for_artist_finder?
+        # return [] unless Sources::Strategies.find(url).normalized_for_artist_finder?
        while artists.empty? && url.size > 10
          u = url.sub(/\/+$/, "") + "/"
@@ -481,13 +481,8 @@ class Artist < ApplicationRecord
    end
    def search_for_profile(url)
-      source = Sources::Site.new(url)
+      source = Sources::Strategies.find(url)
      if source.strategy
        source.get
      find_all_by_url(source.profile_url)
      else
        nil
      end
    rescue Net::OpenTimeout, PixivApiClient::Error
      raise if Rails.env.test?
      nil
--- a/app/models/artist_url.rb
+++ b/app/models/artist_url.rb
@@ -22,7 +22,7 @@ class ArtistUrl < ApplicationRecord
      url = url.sub(%r!^http://blog\d+\.fc2!, "http://blog.fc2")
      url = url.sub(%r!^http://blog-imgs-\d+\.fc2!, "http://blog.fc2")
      url = url.sub(%r!^http://blog-imgs-\d+-\w+\.fc2!, "http://blog.fc2")
-      url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
+      # url = url.sub(%r!^(http://seiga.nicovideo.jp/user/illust/\d+)\?.+!, '\1/')
      url = url.sub(%r!^http://pictures.hentai-foundry.com//!, "http://pictures.hentai-foundry.com/")
      if url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
        url = url.sub(%r{\Ahttps?://(.+?)\.deviantart\.com(.*)}, 'http://www.deviantart.com/\1\2')
@@ -32,11 +32,15 @@ class ArtistUrl < ApplicationRecord
      url = url.downcase if url =~ %r!^https?://(?:mobile\.)?twitter\.com!
      begin
-        url = Sources::Site.new(url).normalize_for_artist_finder!
+        source = Sources::Strategies.find(url)
        if !source.normalized_for_artist_finder? && source.normalizable_for_artist_finder?
          url = source.normalize_for_artist_finder
        end
      rescue Net::OpenTimeout, PixivApiClient::Error
        raise if Rails.env.test?
      rescue Sources::Site::NoStrategyError
      end
      url = url.gsub(/\/+\Z/, "")
      url = url.gsub(%r!^https://!, "http://")
      url + "/"
@@ -102,10 +106,6 @@ class ArtistUrl < ApplicationRecord
  end
  def normalize
    if !Sources::Site.new(normalized_url).normalized_for_artist_finder?
      self.normalized_url = self.class.normalize(url)
    end
  rescue Sources::Site::NoStrategyError
    self.normalized_url = self.class.normalize(url)
  end
--- a/app/models/post.rb
+++ b/app/models/post.rb
@@ -1682,7 +1682,11 @@ class Post < ApplicationRecord
  module PixivMethods
    def parse_pixiv_id
-      self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id_from_url
+      self.pixiv_id = nil
      if Sources::Strategies::Pixiv.match?(source)
        self.pixiv_id = Sources::Strategies::Pixiv.new(source).illust_id
      end
    end
  end
@@ -1790,10 +1794,8 @@ class Post < ApplicationRecord
      return if has_tag?("artist_request") || has_tag?("official_art")
      return if tags.any? { |t| t.category == Tag.categories.artist }
-      site = Sources::Site.new(source)
+      site = Sources::Strategies.find(source)
      self.warnings[:base] << "Artist tag is required. Create a new tag with [[artist:<artist_name>]]. Ask on the forum if you need naming help"
    rescue Sources::Site::NoStrategyError => e
      # unrecognized source; do nothing.
    end
    def has_copyright_tag
--- a/app/models/upload.rb
+++ b/app/models/upload.rb
@@ -47,8 +47,7 @@ class Upload < ApplicationRecord
  end
-  attr_accessor :as_pending,
+  attr_accessor :as_pending, :replaced_post, :file
    :referer_url, :downloaded_source, :replaced_post, :file
  belongs_to :uploader, :class_name => "User"
  belongs_to :post, optional: true
@@ -63,6 +62,7 @@ class Upload < ApplicationRecord
  validates :file_ext, format: { with: /jpg|gif|png|swf|webm|mp4|zip/ }, allow_nil: true
  validates_with Validator
  serialize :context, JSON
  scope :preprocessed, -> { where(status: "preprocessed") }
  def initialize_attributes
    self.uploader_id = CurrentUser.id
--- a/app/views/uploads/_image.html.erb
+++ b/app/views/uploads/_image.html.erb
@@ -1,8 +1,8 @@
 <% if params[:url] %>
-  <% if ImageProxy.needs_proxy?(@normalized_url) %>
+  <% if ImageProxy.needs_proxy?(@source.image_url) %>
-    <%= image_tag(image_proxy_uploads_path(:url => @normalized_url), :title => "Preview", :id => "image") %>
+    <%= image_tag(image_proxy_uploads_path(:url => @source.image_url), :title => "Preview", :id => "image") %>
  <% else %>
-    <%= image_tag(@normalized_url, :title => "Preview", :id => "image") %>
+    <%= image_tag(@source.image_url, :title => "Preview", :id => "image") %>
  <% end %>
  <ul>
--- a/app/views/uploads/index.html.erb
+++ b/app/views/uploads/index.html.erb
@@ -47,11 +47,10 @@
              </span>
              <br>
-              <% if upload.alt_source.present? %>
+              <% if upload.referer_url.present? %>
                <span class="info">
-                  <strong>Alternate Source</strong>
+                  <strong>Referer</strong>
-                  <%= link_to_if (upload.alt_source =~ %r!\Ahttps?://!i), (upload.alt_source.presence.try(:truncate, 50) || content_tag(:em, "none")), upload.source %>
+                  <%= URI.parse(upload.referer_url).host rescue nil %>
                  <%= link_to "»", uploads_path(search: params[:search].merge(source_matches: upload.alt_source)) %>
                </span>
                <br>
              <% end %>
--- a/app/views/uploads/new.html.erb
+++ b/app/views/uploads/new.html.erb
@@ -20,9 +20,7 @@
      <%= form_for(@upload, :html => {:multipart => true, :class => "simple_form", :id => "form"}) do |f| %>
        <%= hidden_field_tag :url, params[:url] %>
        <%= hidden_field_tag :ref, params[:ref] %>
        <%= hidden_field_tag :normalized_url, @normalized_url %>
        <%= f.hidden_field :md5_confirmation %>
        <%= f.hidden_field :referer_url, :value => @source.try(:referer_url) %>
        <% if CurrentUser.can_upload_free? %>
          <div class="input">
--- a/config/docker/compose.yml
+++ b/config/docker/compose.yml
@@ -49,6 +49,7 @@ services:
      - DANBOORU_NICO_SEIGA_LOGIN
      - DANBOORU_NICO_SEIGA_PASSWORD
      - DANBOORU_PERSIST_PIXIV_SESSION
      - DANBOORU_TUMBLR_CONSUMER_KEY
      - CIRCLE_NODE_TOTAL
      - CIRCLE_NODE_INDEX
      - CIRCLE_BUILD_IMAGE
--- a/db/migrate/20180816230604_rename_alt_source_on_uploads.rb
+++ b/db/migrate/20180816230604_rename_alt_source_on_uploads.rb
@@ -0,0 +1,5 @@
 class RenameAltSourceOnUploads < ActiveRecord::Migration[5.2]
  def change
    rename_column :uploads, :alt_source, :referer_url
  end
 end
--- a/db/structure.sql
+++ b/db/structure.sql
@@ -433,8 +433,8 @@ CREATE TABLE public.advertisement_hits (
    id integer NOT NULL,
    advertisement_id integer NOT NULL,
    ip_addr inet NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -464,15 +464,15 @@ ALTER SEQUENCE public.advertisement_hits_id_seq OWNED BY public.advertisement_hi
 CREATE TABLE public.advertisements (
    id integer NOT NULL,
    referral_url text NOT NULL,
-    ad_type character varying(255) NOT NULL,
+    ad_type character varying NOT NULL,
-    status character varying(255) NOT NULL,
+    status character varying NOT NULL,
    hit_count integer DEFAULT 0 NOT NULL,
    width integer NOT NULL,
    height integer NOT NULL,
-    file_name character varying(255) NOT NULL,
+    file_name character varying NOT NULL,
    is_work_safe boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -502,8 +502,8 @@ ALTER SEQUENCE public.advertisements_id_seq OWNED BY public.advertisements.id;
 CREATE TABLE public.amazon_backups (
    id integer NOT NULL,
    last_id integer,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -564,7 +564,7 @@ ALTER SEQUENCE public.anti_voters_id_seq OWNED BY public.anti_voters.id;
 CREATE TABLE public.api_keys (
    id integer NOT NULL,
    user_id integer NOT NULL,
-    key character varying(255) NOT NULL,
+    key character varying NOT NULL,
    created_at timestamp without time zone,
    updated_at timestamp without time zone
 );
@@ -612,8 +612,8 @@ CREATE TABLE public.artist_commentaries (
    original_description text DEFAULT ''::text NOT NULL,
    translated_title text DEFAULT ''::text NOT NULL,
    translated_description text DEFAULT ''::text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -649,8 +649,8 @@ CREATE TABLE public.artist_commentary_versions (
    original_description text,
    translated_title text,
    translated_description text,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -682,8 +682,8 @@ CREATE TABLE public.artist_urls (
    artist_id integer NOT NULL,
    url text NOT NULL,
    normalized_url text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    is_active boolean DEFAULT true NOT NULL
 );
@@ -714,16 +714,16 @@ ALTER SEQUENCE public.artist_urls_id_seq OWNED BY public.artist_urls.id;
 CREATE TABLE public.artist_versions (
    id integer NOT NULL,
    artist_id integer NOT NULL,
-    name character varying(255) NOT NULL,
+    name character varying NOT NULL,
    updater_id integer NOT NULL,
    updater_ip_addr inet NOT NULL,
    is_active boolean DEFAULT true NOT NULL,
    other_names text,
-    group_name character varying(255),
+    group_name character varying,
    url_string text,
    is_banned boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -752,15 +752,15 @@ ALTER SEQUENCE public.artist_versions_id_seq OWNED BY public.artist_versions.id;
 CREATE TABLE public.artists (
    id integer NOT NULL,
-    name character varying(255) NOT NULL,
+    name character varying NOT NULL,
    creator_id integer NOT NULL,
    is_active boolean DEFAULT true NOT NULL,
    is_banned boolean DEFAULT false NOT NULL,
    other_names text,
    other_names_index tsvector,
-    group_name character varying(255),
+    group_name character varying,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -793,8 +793,8 @@ CREATE TABLE public.bans (
    reason text NOT NULL,
    banner_id integer NOT NULL,
    expires_at timestamp without time zone NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -826,7 +826,7 @@ CREATE TABLE public.bulk_update_requests (
    user_id integer NOT NULL,
    forum_topic_id integer,
    script text NOT NULL,
-    status character varying(255) DEFAULT 'pending'::character varying NOT NULL,
+    status character varying DEFAULT 'pending'::character varying NOT NULL,
    created_at timestamp without time zone,
    updated_at timestamp without time zone,
    approver_id integer,
@@ -863,8 +863,8 @@ CREATE TABLE public.comment_votes (
    comment_id integer NOT NULL,
    user_id integer NOT NULL,
    score integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -899,8 +899,8 @@ CREATE TABLE public.comments (
    ip_addr inet NOT NULL,
    body_index tsvector NOT NULL,
    score integer DEFAULT 0 NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    updater_id integer,
    updater_ip_addr inet,
    do_not_bump_post boolean DEFAULT false NOT NULL,
@@ -941,10 +941,10 @@ CREATE TABLE public.delayed_jobs (
    run_at timestamp without time zone,
    locked_at timestamp without time zone,
    failed_at timestamp without time zone,
-    locked_by character varying(255),
+    locked_by character varying,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
-    queue character varying(255)
+    queue character varying
 );
@@ -1013,8 +1013,8 @@ CREATE TABLE public.dmails (
    message_index tsvector NOT NULL,
    is_read boolean DEFAULT false NOT NULL,
    is_deleted boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    creator_ip_addr inet NOT NULL,
    is_spam boolean DEFAULT false
 );
@@ -2149,8 +2149,8 @@ CREATE TABLE public.forum_posts (
    body text NOT NULL,
    text_index tsvector NOT NULL,
    is_deleted boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2182,7 +2182,7 @@ CREATE TABLE public.forum_subscriptions (
    user_id integer,
    forum_topic_id integer,
    last_read_at timestamp without time zone,
-    delete_key character varying(255)
+    delete_key character varying
 );
@@ -2246,14 +2246,14 @@ CREATE TABLE public.forum_topics (
    id integer NOT NULL,
    creator_id integer NOT NULL,
    updater_id integer NOT NULL,
-    title character varying(255) NOT NULL,
+    title character varying NOT NULL,
    response_count integer DEFAULT 0 NOT NULL,
    is_sticky boolean DEFAULT false NOT NULL,
    is_locked boolean DEFAULT false NOT NULL,
    is_deleted boolean DEFAULT false NOT NULL,
    text_index tsvector NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    category_id integer DEFAULT 0 NOT NULL,
    min_level integer DEFAULT 0 NOT NULL
 );
@@ -2287,8 +2287,8 @@ CREATE TABLE public.ip_bans (
    creator_id integer NOT NULL,
    ip_addr inet NOT NULL,
    reason text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2320,9 +2320,9 @@ CREATE TABLE public.janitor_trials (
    creator_id integer NOT NULL,
    user_id integer NOT NULL,
    original_level integer,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
-    status character varying(255) DEFAULT 'active'::character varying NOT NULL
+    status character varying DEFAULT 'active'::character varying NOT NULL
 );
@@ -2353,8 +2353,8 @@ CREATE TABLE public.mod_actions (
    id integer NOT NULL,
    creator_id integer NOT NULL,
    description text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    category integer
 );
@@ -2387,8 +2387,8 @@ CREATE TABLE public.news_updates (
    message text NOT NULL,
    creator_id integer NOT NULL,
    updater_id integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2427,8 +2427,8 @@ CREATE TABLE public.note_versions (
    height integer NOT NULL,
    is_active boolean DEFAULT true NOT NULL,
    body text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    version integer DEFAULT 0 NOT NULL
 );
@@ -2467,8 +2467,8 @@ CREATE TABLE public.notes (
    is_active boolean DEFAULT true NOT NULL,
    body text NOT NULL,
    body_index tsvector NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    version integer DEFAULT 0 NOT NULL
 );
@@ -2500,7 +2500,7 @@ CREATE TABLE public.pixiv_ugoira_frame_data (
    id integer NOT NULL,
    post_id integer,
    data text NOT NULL,
-    content_type character varying(255) NOT NULL
+    content_type character varying NOT NULL
 );
@@ -2529,16 +2529,16 @@ ALTER SEQUENCE public.pixiv_ugoira_frame_data_id_seq OWNED BY public.pixiv_ugoir
 CREATE TABLE public.pools (
    id integer NOT NULL,
-    name character varying(255),
+    name character varying,
    creator_id integer NOT NULL,
    description text,
    is_active boolean DEFAULT true NOT NULL,
    post_ids text DEFAULT ''::text NOT NULL,
    post_count integer DEFAULT 0 NOT NULL,
    is_deleted boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
-    category character varying(255) DEFAULT 'series'::character varying NOT NULL
+    category character varying DEFAULT 'series'::character varying NOT NULL
 );
@@ -2571,8 +2571,8 @@ CREATE TABLE public.post_appeals (
    creator_id integer NOT NULL,
    creator_ip_addr inet,
    reason text,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2635,9 +2635,9 @@ CREATE TABLE public.post_disapprovals (
    id integer NOT NULL,
    user_id integer NOT NULL,
    post_id integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
-    reason character varying(255) DEFAULT 'legacy'::character varying,
+    reason character varying DEFAULT 'legacy'::character varying,
    message text
 );
@@ -2672,8 +2672,8 @@ CREATE TABLE public.post_flags (
    creator_ip_addr inet NOT NULL,
    reason text,
    is_resolved boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2758,8 +2758,8 @@ CREATE TABLE public.post_votes (
    post_id integer NOT NULL,
    user_id integer NOT NULL,
    score integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -2788,13 +2788,13 @@ ALTER SEQUENCE public.post_votes_id_seq OWNED BY public.post_votes.id;
 CREATE TABLE public.posts (
    id integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    up_score integer DEFAULT 0 NOT NULL,
    down_score integer DEFAULT 0 NOT NULL,
    score integer DEFAULT 0 NOT NULL,
-    source character varying(255) DEFAULT ''::character varying NOT NULL,
+    source character varying DEFAULT ''::character varying NOT NULL,
-    md5 character varying(255) NOT NULL,
+    md5 character varying NOT NULL,
    rating character(1) DEFAULT 'q'::bpchar NOT NULL,
    is_note_locked boolean DEFAULT false NOT NULL,
    is_rating_locked boolean DEFAULT false NOT NULL,
@@ -2817,7 +2817,7 @@ CREATE TABLE public.posts (
    tag_count_artist integer DEFAULT 0 NOT NULL,
    tag_count_character integer DEFAULT 0 NOT NULL,
    tag_count_copyright integer DEFAULT 0 NOT NULL,
-    file_ext character varying(255) NOT NULL,
+    file_ext character varying NOT NULL,
    file_size integer NOT NULL,
    image_width integer NOT NULL,
    image_height integer NOT NULL,
@@ -2890,7 +2890,7 @@ ALTER SEQUENCE public.saved_searches_id_seq OWNED BY public.saved_searches.id;
 --
 CREATE TABLE public.schema_migrations (
-    version character varying(255) NOT NULL
+    version character varying NOT NULL
 );
@@ -2931,14 +2931,14 @@ ALTER SEQUENCE public.super_voters_id_seq OWNED BY public.super_voters.id;
 CREATE TABLE public.tag_aliases (
    id integer NOT NULL,
-    antecedent_name character varying(255) NOT NULL,
+    antecedent_name character varying NOT NULL,
-    consequent_name character varying(255) NOT NULL,
+    consequent_name character varying NOT NULL,
    creator_id integer NOT NULL,
    creator_ip_addr inet NOT NULL,
    forum_topic_id integer,
    status text DEFAULT 'pending'::text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    post_count integer DEFAULT 0 NOT NULL,
    approver_id integer,
    forum_post_id integer
@@ -2970,15 +2970,15 @@ ALTER SEQUENCE public.tag_aliases_id_seq OWNED BY public.tag_aliases.id;
 CREATE TABLE public.tag_implications (
    id integer NOT NULL,
-    antecedent_name character varying(255) NOT NULL,
+    antecedent_name character varying NOT NULL,
-    consequent_name character varying(255) NOT NULL,
+    consequent_name character varying NOT NULL,
    descendant_names text NOT NULL,
    creator_id integer NOT NULL,
    creator_ip_addr inet NOT NULL,
    forum_topic_id integer,
    status text DEFAULT 'pending'::text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    approver_id integer,
    forum_post_id integer
 );
@@ -3010,14 +3010,14 @@ ALTER SEQUENCE public.tag_implications_id_seq OWNED BY public.tag_implications.i
 CREATE TABLE public.tag_subscriptions (
    id integer NOT NULL,
    creator_id integer NOT NULL,
-    name character varying(255) NOT NULL,
+    name character varying NOT NULL,
    tag_query text NOT NULL,
    post_ids text NOT NULL,
    is_public boolean DEFAULT true NOT NULL,
    last_accessed_at timestamp without time zone,
    is_opted_in boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -3046,7 +3046,7 @@ ALTER SEQUENCE public.tag_subscriptions_id_seq OWNED BY public.tag_subscriptions
 CREATE TABLE public.tags (
    id integer NOT NULL,
-    name character varying(255) NOT NULL,
+    name character varying NOT NULL,
    post_count integer DEFAULT 0 NOT NULL,
    category integer DEFAULT 0 NOT NULL,
    related_tags text,
@@ -3094,8 +3094,8 @@ CREATE UNLOGGED TABLE public.token_buckets (
 CREATE TABLE public.uploads (
    id integer NOT NULL,
    source text,
-    file_path character varying(255),
+    file_path character varying,
-    content_type character varying(255),
+    content_type character varying,
    rating character(1) NOT NULL,
    uploader_id integer NOT NULL,
    uploader_ip_addr inet NOT NULL,
@@ -3103,9 +3103,9 @@ CREATE TABLE public.uploads (
    status text DEFAULT 'pending'::text NOT NULL,
    backtrace text,
    post_id integer,
-    md5_confirmation character varying(255),
+    md5_confirmation character varying,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    server text,
    parent_id integer,
    md5 character varying,
@@ -3117,7 +3117,7 @@ CREATE TABLE public.uploads (
    artist_commentary_title text,
    include_artist_commentary boolean,
    context text,
-    alt_source text
+    referer_url text
 );
@@ -3148,10 +3148,10 @@ CREATE TABLE public.user_feedback (
    id integer NOT NULL,
    user_id integer NOT NULL,
    creator_id integer NOT NULL,
-    category character varying(255) NOT NULL,
+    category character varying NOT NULL,
    body text NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -3180,15 +3180,15 @@ ALTER SEQUENCE public.user_feedback_id_seq OWNED BY public.user_feedback.id;
 CREATE TABLE public.user_name_change_requests (
    id integer NOT NULL,
-    status character varying(255) DEFAULT 'pending'::character varying NOT NULL,
+    status character varying DEFAULT 'pending'::character varying NOT NULL,
    user_id integer NOT NULL,
    approver_id integer,
-    original_name character varying(255),
+    original_name character varying,
-    desired_name character varying(255),
+    desired_name character varying,
    change_reason text,
    rejection_reason text,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -3217,10 +3217,10 @@ ALTER SEQUENCE public.user_name_change_requests_id_seq OWNED BY public.user_name
 CREATE TABLE public.user_password_reset_nonces (
    id integer NOT NULL,
-    key character varying(255) NOT NULL,
+    key character varying NOT NULL,
-    email character varying(255) NOT NULL,
+    email character varying NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone
 );
@@ -3249,12 +3249,12 @@ ALTER SEQUENCE public.user_password_reset_nonces_id_seq OWNED BY public.user_pas
 CREATE TABLE public.users (
    id integer NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
-    name character varying(255) NOT NULL,
+    name character varying NOT NULL,
-    password_hash character varying(255) NOT NULL,
+    password_hash character varying NOT NULL,
-    email character varying(255),
+    email character varying,
-    email_verification_key character varying(255),
+    email_verification_key character varying,
    inviter_id integer,
    level integer DEFAULT 0 NOT NULL,
    base_upload_limit integer DEFAULT 10 NOT NULL,
@@ -3266,13 +3266,13 @@ CREATE TABLE public.users (
    note_update_count integer DEFAULT 0 NOT NULL,
    favorite_count integer DEFAULT 0 NOT NULL,
    comment_threshold integer DEFAULT '-1'::integer NOT NULL,
-    default_image_size character varying(255) DEFAULT 'large'::character varying NOT NULL,
+    default_image_size character varying DEFAULT 'large'::character varying NOT NULL,
    favorite_tags text,
    blacklisted_tags text DEFAULT 'spoilers
 guro
 scat
 furry -rating:s'::text,
-    time_zone character varying(255) DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL,
+    time_zone character varying DEFAULT 'Eastern Time (US & Canada)'::character varying NOT NULL,
    bcrypt_password_hash text,
    per_page integer DEFAULT 20 NOT NULL,
    custom_style text,
@@ -3310,11 +3310,11 @@ CREATE TABLE public.wiki_page_versions (
    wiki_page_id integer NOT NULL,
    updater_id integer NOT NULL,
    updater_ip_addr inet NOT NULL,
-    title character varying(255) NOT NULL,
+    title character varying NOT NULL,
    body text NOT NULL,
    is_locked boolean NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    other_names text,
    is_deleted boolean DEFAULT false NOT NULL
 );
@@ -3346,12 +3346,12 @@ ALTER SEQUENCE public.wiki_page_versions_id_seq OWNED BY public.wiki_page_versio
 CREATE TABLE public.wiki_pages (
    id integer NOT NULL,
    creator_id integer NOT NULL,
-    title character varying(255) NOT NULL,
+    title character varying NOT NULL,
    body text NOT NULL,
    body_index tsvector NOT NULL,
    is_locked boolean DEFAULT false NOT NULL,
-    created_at timestamp without time zone NOT NULL,
+    created_at timestamp without time zone,
-    updated_at timestamp without time zone NOT NULL,
+    updated_at timestamp without time zone,
    updater_id integer,
    other_names text,
    other_names_index tsvector,
@@ -4770,6 +4770,14 @@ ALTER TABLE ONLY public.saved_searches
    ADD CONSTRAINT saved_searches_pkey PRIMARY KEY (id);
 --
 -- Name: schema_migrations schema_migrations_pkey; Type: CONSTRAINT; Schema: public; Owner: -
 --
 ALTER TABLE ONLY public.schema_migrations
    ADD CONSTRAINT schema_migrations_pkey PRIMARY KEY (version);
 --
 -- Name: super_voters super_voters_pkey; Type: CONSTRAINT; Schema: public; Owner: -
 --
@@ -6994,6 +7002,13 @@ CREATE INDEX index_posts_on_parent_id ON public.posts USING btree (parent_id);
 CREATE INDEX index_posts_on_pixiv_id ON public.posts USING btree (pixiv_id) WHERE (pixiv_id IS NOT NULL);
 --
 -- Name: index_posts_on_source; Type: INDEX; Schema: public; Owner: -
 --
 CREATE INDEX index_posts_on_source ON public.posts USING btree (lower((source)::text));
 --
 -- Name: index_posts_on_source_pattern; Type: INDEX; Schema: public; Owner: -
 --
@@ -7015,6 +7030,13 @@ CREATE INDEX index_posts_on_tags_index ON public.posts USING gin (tag_index);
 CREATE INDEX index_posts_on_uploader_id ON public.posts USING btree (uploader_id);
 --
 -- Name: index_posts_on_uploader_ip_addr; Type: INDEX; Schema: public; Owner: -
 --
 CREATE INDEX index_posts_on_uploader_ip_addr ON public.posts USING btree (uploader_ip_addr);
 --
 -- Name: index_saved_searches_on_labels; Type: INDEX; Schema: public; Owner: -
 --
@@ -7121,10 +7143,10 @@ CREATE UNIQUE INDEX index_token_buckets_on_user_id ON public.token_buckets USING
 --
-- Name: index_uploads_on_alt_source; Type: INDEX; Schema: public; Owner: -
+-- Name: index_uploads_on_referer_url; Type: INDEX; Schema: public; Owner: -
 --
-CREATE INDEX index_uploads_on_alt_source ON public.uploads USING btree (alt_source);
+CREATE INDEX index_uploads_on_referer_url ON public.uploads USING btree (referer_url);
 --
@@ -7267,13 +7289,6 @@ CREATE INDEX index_wiki_pages_on_title_pattern ON public.wiki_pages USING btree
 CREATE INDEX index_wiki_pages_on_updated_at ON public.wiki_pages USING btree (updated_at);
 --
 -- Name: unique_schema_migrations; Type: INDEX; Schema: public; Owner: -
 --
 CREATE UNIQUE INDEX unique_schema_migrations ON public.schema_migrations USING btree (version);
 --
 -- Name: favorites insert_favorites_trigger; Type: TRIGGER; Schema: public; Owner: -
 --
@@ -7502,13 +7517,13 @@ INSERT INTO "schema_migrations" (version) VALUES
 ('20171230220225'),
 ('20180113211343'),
 ('20180116001101'),
 ('20180310070233'),
 ('20180403231351'),
 ('20180413224239'),
 ('20180425194016'),
 ('20180516222413'),
 ('20180517190048'),
 ('20180518175154'),
-('20180804203201');
+('20180804203201'),
 ('20180816230604');
--- a/lib/tasks/images.rake
+++ b/lib/tasks/images.rake
@@ -87,7 +87,7 @@ namespace :images do
    post = Post.find(post_id)
    post.source =~ /(\d{5,})/
    if illust_id = $1
-      response = PixivApiClient.new.works(illust_id)
+      response = PixivApiClient.new.work(illust_id)
      upload = Upload.new
      upload.source = response.pages.first
      upload.file_ext = post.file_ext
--- a/test/functional/uploads_controller_test.rb
+++ b/test/functional/uploads_controller_test.rb
@@ -68,6 +68,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
        end
      end
      context "for a direct link twitter post" do
        setup do
          @ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881"
          @source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"
        end
        should "trigger the preprocessor" do
          assert_difference(-> { Upload.preprocessed.count }, 1) do
            get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref}
            Delayed::Worker.new.work_off
          end
        end
      end
      context "for a twitter post" do
        setup do
          @source = "https://twitter.com/frappuccino/status/566030116182949888"
@@ -89,6 +103,20 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
        end
      end
      context "for a pixiv post" do
        setup do
          @ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482"
          @source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
        end
        should "trigger the preprocessor" do
          assert_difference(-> { Upload.preprocessed.count }, 1) do
            get_auth new_upload_path, @user, params: {:url => @source, :ref => @ref}
            Delayed::Worker.new.work_off
          end
        end
      end
      context "for a post that has already been uploaded" do
        setup do
          as_user do
@@ -149,6 +177,48 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
    end
    context "create action" do
      context "when a preprocessed upload already exists" do
        context "for twitter" do
          setup do
            as_user do
              @ref = "https://twitter.com/onsen_musume_jp/status/865534101918330881"
              @source = "https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"
              @upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg")
            end
          end
          should "update the predecessor" do
            assert_difference(->{ Post.count }, 1) do
              assert_difference(->{ Upload.count }, 0) do
                post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}}
              end
            end
            post = Post.last
            assert_match(/aaa/, post.tag_string)            
          end
        end
        context "for pixiv" do
          setup do
            @ref = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482"
            @source = "https://i.pximg.net/img-original/img/2015/03/14/17/53/32/49270482_p0.jpg"
            as_user do
              @upload = create(:upload, status: "preprocessed", source: @source, referer_url: @ref, image_width: 0, image_height: 0, file_size: 0, md5: "something", file_ext: "jpg")
            end
          end
          should "update the predecessor" do
            assert_difference(->{ Post.count }, 1) do
              assert_difference(->{ Upload.count }, 0) do
                post_auth uploads_path, @user, params: {:upload => {:tag_string => "aaa", :rating => "q", :source => @source, :referer_url => @ref}}
              end
            end
            post = Post.last
            assert_match(/aaa/, post.tag_string)            
          end
        end
      end
      should "create a new upload" do
        assert_difference("Upload.count", 1) do
          file = Rack::Test::UploadedFile.new("#{Rails.root}/test/files/test.jpg", "image/jpeg")
--- a/test/models/upload_service_test.rb
+++ b/test/models/upload_service_test.rb
@@ -17,34 +17,59 @@ class UploadServiceTest < ActiveSupport::TestCase
  context "::Utils" do
    subject { UploadService::Utils }
-    context "#download_from_source" do
+    context "#download_for_upload" do
      context "for a non-source site" do
        setup do
-        @jpeg = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg"
+          @source = "https://upload.wikimedia.org/wikipedia/commons/c/c5/Moraine_Lake_17092005.jpg"          
-        @ugoira = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip"
+          @upload = Upload.new
          @upload.source = @source
        end
        should "work on a jpeg" do
-        file = subject.download_from_source(@jpeg) do |context|
+          file = subject.download_for_upload(@upload)
          assert_not_nil(context[:downloaded_source])
          assert_not_nil(context[:source])
        end
          assert_operator(File.size(file.path), :>, 0)
          file.close
        end
      end
      context "for a pixiv" do
        setup do
          @source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247350"
          @upload = Upload.new
          @upload.source = @source
        end
        should "work on an ugoira url" do
-        file = subject.download_from_source(@ugoira, referer_url: "https://www.pixiv.net") do |context|
+          file = subject.download_for_upload(@upload)
          assert_not_nil(context[:downloaded_source])
          assert_not_nil(context[:source])
          assert_not_nil(context[:ugoira])
        end
          assert_operator(File.size(file.path), :>, 0)
          file.close
        end
      end
      context "for a pixiv ugoira" do
        setup do
          @source = "https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip"
          @referer = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
          @upload = Upload.new
          @upload.source = @source
          @upload.referer_url = @referer
        end
        should "work on an ugoira url" do
          file = subject.download_for_upload(@upload)
          assert_not_nil(@upload.context["ugoira"])
          assert_operator(File.size(file.path), :>, 0)
          file.close
        end
      end
    end
    context ".calculate_ugoira_dimensions" do
      context "for a valid ugoira file" do
        setup do
@@ -343,9 +368,6 @@ class UploadServiceTest < ActiveSupport::TestCase
          FactoryBot.create(:user)
        end
        CurrentUser.ip_addr = "127.0.0.1"
        @jpeg = "https://raikou1.donmai.us/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
        @ugoira = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
        @video = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
      end
      teardown do
@@ -356,65 +378,81 @@ class UploadServiceTest < ActiveSupport::TestCase
      context "for twitter" do
        setup do
          @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
          @norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
          @ref = "https://twitter.com/nounproject/status/540944400767922176"
        end
-        should "record the correct source when a referer is given" do
+        should "download the file" do
          @service = subject.new(source: @source, referer_url: @ref)
          @upload = @service.start!
-          assert_equal(@ref, @upload.source)
+          assert_equal("preprocessed", @upload.status)
-        end
+          assert_equal(9800, @upload.file_size)
-
+          assert_equal("png", @upload.file_ext)
-        should "save the twimg url in alt_source" do
+          assert_equal("f5fe24f3a3a13885285f6627e04feec9", @upload.md5)
-          @service = subject.new(source: @source, referer_url: @ref)
+          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :original)))
-          @upload = @service.start!
+          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "png", :preview)))
          assert_equal(@norm_source, @upload.alt_source)
        end
      end
      context "for pixiv" do
        setup do
-          @source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735"
+          @source = "https://i.pximg.net/img-original/img/2014/10/29/09/27/19/46785915_p0.jpg"
-          @ref = "http://www.pixiv.net/member.php?id=696859"
+          @ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=46785915"
          @direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
        end
-        should "record the correct source" do
+        should "download the file" do
          @service = subject.new(source: @source, referer_url: @ref)
          @upload = @service.start!
          assert_equal(@direct, @upload.source)
        end        
      end
      should "work for a jpeg" do
        @service = subject.new(source: @jpeg)
        @upload = @service.start!
          assert_equal("preprocessed", @upload.status)
-        assert_not_nil(@upload.md5)
+          assert_equal(294591, @upload.file_size)
          assert_equal("jpg", @upload.file_ext)
-        assert_operator(@upload.file_size, :>, 0)
+          assert_equal("3cb1ef624714c15dbb2d6e7b1d57faef", @upload.md5)
        assert_not_nil(@upload.source)
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original)))
        # this image is not large enough to generate a large file
        #assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large)))
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview)))
        end
      end
-      should "work for an ugoira" do
+      context "for pixiv ugoira" do
-        @service = subject.new(source: @ugoira)
+        setup do
          @source = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
        end
        should "download the file" do
          @service = subject.new(source: @source)
          @upload = @service.start!
          assert_equal("preprocessed", @upload.status)
-        assert_not_nil(@upload.md5)
+          assert_equal(2804, @upload.file_size)
          assert_equal("zip", @upload.file_ext)
-        assert_operator(@upload.file_size, :>, 0)
+          assert_equal("cad1da177ef309bf40a117c17b8eecf5", @upload.md5)
        assert_not_nil(@upload.source)
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :original)))
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "zip", :large)))
        end
      end
      context "for null" do
        setup do
          @source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg"
        end
        should "download the file" do
          @service = subject.new(source: @source)
          @upload = @service.start!
          assert_equal("preprocessed", @upload.status)
          assert_equal(181309, @upload.file_size)
          assert_equal("jpg", @upload.file_ext)
          assert_equal("93f4dd66ef1eb11a89e56d31f9adc8d0", @upload.md5)
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :original)))
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :large)))
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "jpg", :preview)))
        end
      end
      context "for a video" do
        setup do
          @source = "https://www.sample-videos.com/video/mp4/720/big_buck_bunny_720p_1mb.mp4"
        end
        should "work for a video" do
-        @service = subject.new(source: @video)
+          @service = subject.new(source: @source)
          @upload = @service.start!
          assert_equal("preprocessed", @upload.status)
          assert_not_nil(@upload.md5)
@@ -424,14 +462,16 @@ class UploadServiceTest < ActiveSupport::TestCase
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :original)))
          assert(File.exists?(Danbooru.config.storage_manager.file_path(@upload.md5, "mp4", :preview)))
        end
      end
      context "on timeout errors" do
        setup do
          @source = "https://raikou1.donmai.us/93/f4/93f4dd66ef1eb11a89e56d31f9adc8d0.jpg"
          HTTParty.stubs(:get).raises(Net::ReadTimeout)
        end
        should "leave the upload in an error state" do
-          @service = subject.new(source: @video)
+          @service = subject.new(source: @source)
          @upload = @service.start!
          assert_match(/error:/, @upload.status)
        end
@@ -445,41 +485,15 @@ class UploadServiceTest < ActiveSupport::TestCase
          FactoryBot.create(:user)
        end
        CurrentUser.ip_addr = "127.0.0.1"
        @source = "https://twitter.com/nounproject/status/540944400767922176"
      end
-      context "for twitter" do
+      should "overwrite the attributes" do
-        setup do
+        @service = subject.new(source: @source, rating: 'e')
          @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
          @norm_source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
          @ref = "https://twitter.com/nounproject/status/540944400767922176"
        end
        should "record the correct source when a referer is given" do
          @service = subject.new(source: @source, referer_url: @ref)
        @upload = @service.start!        
          @service = subject.new(source: @source)
        @service.finish!
        @upload.reload
-
+        assert_equal('e', @upload.rating)
          assert_equal(@ref, @upload.source)
        end        
      end
      context "for pixiv" do
        setup do
          @source = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735"
          @ref = "http://www.pixiv.net/member.php?id=696859"
          @direct = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
        end
        should "record the correct source" do
          @service = subject.new(source: @source, referer_url: @ref)
          @upload = @service.start!
          @service = subject.new(source: @source)
          @service.finish!
          @upload.reload
          assert_equal(@direct, @upload.source)
        end        
      end
    end
  end
@@ -637,7 +651,7 @@ class UploadServiceTest < ActiveSupport::TestCase
          image_url = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
          as_user { @post.replace!(replacement_url: replacement_url) }
-          assert_equal(image_url, @post.replacements.last.replacement_url)
+          assert_equal(replacement_url, @post.replacements.last.replacement_url)
        end
      end
@@ -1027,7 +1041,33 @@ class UploadServiceTest < ActiveSupport::TestCase
      CurrentUser.ip_addr = nil
    end
-    context "for an ugoira" do
+    context "for a pixiv" do
      setup do
        @source = "https://i.pximg.net/img-original/img/2017/11/21/05/12/37/65981735_p0.jpg"
        @ref = "https://www.pixiv.net/member_illust.php?mode=medium&illust_id=65981735"
        @upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref)
      end
      should "record the canonical source" do
        post = subject.new({}).create_post_from_upload(@upload)
        assert_equal(@source, post.source)
      end
    end
    context "for a twitter" do
      setup do
        @source = "https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:large"
        @ref = "https://twitter.com/aranobu/status/817736083567820800"
        @upload = FactoryBot.create(:jpg_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, source: @source, referer_url: @ref)
      end
      should "record the canonical source" do
        post = subject.new({}).create_post_from_upload(@upload)
        assert_equal(@ref, post.source)
      end 
    end
    context "for a pixiv ugoira" do
      setup do
        @upload = FactoryBot.create(:ugoira_upload, file_size: 1000, md5: "12345", file_ext: "jpg", image_width: 100, image_height: 100, context: UGOIRA_CONTEXT)
      end
--- a/test/test_helpers/download_test_helper.rb
+++ b/test/test_helpers/download_test_helper.rb
@@ -1,23 +1,22 @@
 require 'ptools'
 module DownloadTestHelper
-  def assert_downloaded(expected_filesize, source)
+  def assert_downloaded(expected_filesize, source, referer=nil)
-    download = Downloads::File.new(source)
+    download = Downloads::File.new(source, referer)
-    tempfile = download.download!
+    tempfile, strategy = download.download!
    assert_equal(expected_filesize, tempfile.size, "Tested source URL: #{source}")
  rescue Net::OpenTimeout
    skip "Remote connection to #{source} failed"
  end
-  def assert_rewritten(expected_source, test_source)
+  def assert_rewritten(expected_source, test_source, test_referer=nil)
-    download = Downloads::File.new(test_source)
+    strategy = Sources::Strategies.find(test_source, test_referer)
-
+    rewritten_source = strategy.image_url
    rewritten_source, _, _ = download.before_download(test_source, {})
    assert_match(expected_source, rewritten_source, "Tested source URL: #{test_source}")
  end
-  def assert_not_rewritten(source)
+  def assert_not_rewritten(source, referer=nil)
-    assert_rewritten(source, source)
+    assert_rewritten(source, source, referer)
  end
  def check_ffmpeg
--- a/test/unit/artist_test.rb
+++ b/test/unit/artist_test.rb
@@ -229,9 +229,7 @@ class ArtistTest < ActiveSupport::TestCase
      should "find the correct artist for page URLs" do
        assert_artist_found("artgerm", "http://www.deviantart.com/artgerm/art/Peachy-Princess-Ver-2-457220550")
        assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/art/My-Queen-426745289")
        assert_artist_found("trixia", "http://www.deviantart.com/trixdraws/gallery/#/d722mrt")
      end
      should "find the correct artist for image URLs" do
@@ -281,11 +279,6 @@ class ArtistTest < ActiveSupport::TestCase
        assert_artist_found("bkub",  "http://www.pixiv.net/i/46239857")
      end
      should "find nothing for malformed URLs" do
        assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=herpderp")
        assert_artist_not_found("http://www.pixiv.net/wharrgarbl")
      end
      should "find nothing for bad IDs" do
        assert_raises(PixivApiClient::BadIDError) do
          assert_artist_not_found("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=32049358")
--- a/test/unit/artist_url_test.rb
+++ b/test/unit/artist_url_test.rb
@@ -45,6 +45,56 @@ class ArtistUrlTest < ActiveSupport::TestCase
      end
    end
    context "artstation urls" do
      setup do
        @urls = [
          FactoryBot.create(:artist_url, url: "https://www.artstation.com/koyorin"),
          FactoryBot.create(:artist_url, url: "https://www.artstation.com/artist/koyorin"),
          FactoryBot.create(:artist_url, url: "https://koyorin.artstation.com"),
          FactoryBot.create(:artist_url, url: "https://www.artstation.com/artwork/04XA4")
        ]
      end
      should "normalize" do
        assert_equal("http://www.artstation.com/koyorin/", @urls[0].normalized_url)
        assert_equal("http://www.artstation.com/koyorin/", @urls[1].normalized_url)
        assert_equal("http://www.artstation.com/koyorin/", @urls[2].normalized_url)
        assert_equal("http://www.artstation.com/jeyrain/", @urls[3].normalized_url)
      end
    end
    context "deviantart urls" do
      setup do
        @urls = [
          FactoryBot.create(:artist_url, url: "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484"),
          FactoryBot.create(:artist_url, url: "http://noizave.deviantart.com/art/test-post-please-ignore-685436408"),
          FactoryBot.create(:artist_url, url: "https://www.deviantart.com/noizave")
        ]
      end
      should "normalize" do
        assert_equal("http://www.deviantart.com/aeror404/", @urls[0].normalized_url)
        assert_equal("http://www.deviantart.com/noizave/", @urls[1].normalized_url)
        assert_equal("http://www.deviantart.com/noizave/", @urls[2].normalized_url)
      end
    end
    context "nicoseiga urls" do
      setup do
        @urls = [
          FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/user/illust/7017777"),
          FactoryBot.create(:artist_url, url: "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"),
          FactoryBot.create(:artist_url, url: "http://seiga.nicovideo.jp/seiga/im4937663")
        ]
      end
      should "normalize" do
        assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[0].normalized_url)
        assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[1].normalized_url)
        assert_equal("http://seiga.nicovideo.jp/user/illust/7017777", @urls[2].normalized_url)
      end
    end
    should "normalize fc2 urls" do
      url = FactoryBot.create(:artist_url, :url => "http://blog55.fc2.com/monet")
      assert_equal("http://blog55.fc2.com/monet", url.url)
@@ -56,13 +106,13 @@ class ArtistUrlTest < ActiveSupport::TestCase
    end
    should "normalize deviant art artist urls" do
-      url = FactoryBot.create(:artist_url, :url => "https://caidychen.deviantart.com/")
+      url = FactoryBot.create(:artist_url, :url => "https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
-      assert_equal("http://www.deviantart.com/caidychen/", url.normalized_url)      
+      assert_equal("http://www.deviantart.com/aeror404/", url.normalized_url)      
    end
    should "normalize nico seiga artist urls" do
-      url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/1826959")
+      url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/user/illust/7017777")
-      assert_equal("http://seiga.nicovideo.jp/user/illust/1826959/", url.normalized_url)
+      assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
      url = FactoryBot.create(:artist_url, :url => "http://seiga.nicovideo.jp/seiga/im4937663")
      assert_equal("http://seiga.nicovideo.jp/user/illust/7017777/", url.normalized_url)
@@ -80,9 +130,9 @@ class ArtistUrlTest < ActiveSupport::TestCase
    end
    should "normalize twitter urls" do
-      url = FactoryBot.create(:artist_url, :url => "https://twitter.com/MONET/status/12345")
+      url = FactoryBot.create(:artist_url, :url => "https://twitter.com/aoimanabu/status/892370963630743552")
-      assert_equal("https://twitter.com/MONET/status/12345", url.url)
+      assert_equal("https://twitter.com/aoimanabu/status/892370963630743552", url.url)
-      assert_equal("http://twitter.com/monet/status/12345/", url.normalized_url)
+      assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
    end
  end
 end
--- a/test/unit/downloads/art_station_test.rb
+++ b/test/unit/downloads/art_station_test.rb
@@ -4,31 +4,35 @@ module Downloads
  class ArtStationTest < ActiveSupport::TestCase
    context "a download for a (small) artstation image" do
      setup do
-        @source = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
+        @asset = "https://cdnb3.artstation.com/p/assets/images/images/003/716/071/small/aoi-ogata-hate-city.jpg?1476754974"
-        @download = Downloads::File.new(@source)
+        @download = Downloads::File.new(@asset)
      end
      should "download the large image instead" do
-        assert_equal("https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974", @download.source)
+        file, strategy = @download.download!
        assert_equal(517_706, ::File.size(file.path))
      end
    end
    context "for an image where an original does not exist" do
      setup do
-        @source = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg"
+        @asset = "https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg"
-        @download = Downloads::File.new(@source)
+        @download = Downloads::File.new(@asset)
        @download.download!
      end
      should "not try to download the original" do
-        assert_equal("https://cdna.artstation.com/p/assets/images/images/004/730/278/large/mendel-oh-dragonll.jpg", @download.source)
+        file, strategy = @download.download!
        assert_equal(449_047, ::File.size(file.path))
      end
    end
    context "a download for an ArtStation image hosted on CloudFlare" do
      setup do
        @asset = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
      end
      should "return the original file, not the polished file" do
-        @source = "https://cdnb.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974"
+        assert_downloaded(517_706, @asset) # polished size: 502_052
        assert_downloaded(517_706, @source) # polished size: 502_052
      end
    end
@@ -36,11 +40,12 @@ module Downloads
      setup do
        @source = "https://dantewontdie.artstation.com/projects/YZK5q"
        @download = Downloads::File.new(@source)
        @download.download!
      end
      should "download the original image instead" do
-        assert_equal("https://cdna.artstation.com/p/assets/images/images/006/066/534/large/yinan-cui-reika.jpg?1495781565", @download.source)
+        file, strategy = @download.download!
        assert_equal(237_651, ::File.size(file.path))
      end
    end
  end
--- a/test/unit/downloads/deviant_art_test.rb
+++ b/test/unit/downloads/deviant_art_test.rb
@@ -8,11 +8,7 @@ module Downloads
        @source = "http://starbitt.deviantart.com/art/09271X-636962118"
        @download = Downloads::File.new(@source)
-        @tempfile = @download.download!
+        @tempfile, strategy = @download.download!
      end
      should "set the html page as the source" do
        assert_equal("https://orig00.deviantart.net/82ef/f/2016/271/7/1/aaaaaa_by_starbitt-daj8b46.gif", @download.source)
      end
      should "work" do
--- a/test/unit/downloads/file_test.rb
+++ b/test/unit/downloads/file_test.rb
@@ -41,7 +41,7 @@ module Downloads
      end
      should "store the file in the tempfile path" do
-        tempfile = @download.download!
+        tempfile, strategy = @download.download!
        assert_equal(@source, @download.source)
        assert_operator(tempfile.size, :>, 0, "should have data")
      end
--- a/test/unit/downloads/pixiv_test.rb
+++ b/test/unit/downloads/pixiv_test.rb
@@ -4,6 +4,7 @@ module Downloads
  class PixivTest < ActiveSupport::TestCase
    def setup
      super
      Downloads::File.stubs(:is_cloudflare?).returns(false)
      load_pixiv_tokens!
    end
@@ -13,29 +14,6 @@ module Downloads
    end
    context "in all cases" do
      # Test an old illustration (one uploaded before 2014-09-16). New
      # /img-original/ and /img-master/ URLs currently don't work for images
      # uploaded before this date. Only old /imgXX/img/username/ URLs work.
      context "downloading an old PNG illustration" do
        setup do
          @medium_page = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=14901720"
          @big_page    = "http://www.pixiv.net/member_illust.php?mode=big&illust_id=14901720"
          @new_small_thumbnail  = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
          @new_medium_thumbnail = "http://i1.pixiv.net/c/600x600/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
          @new_full_size_image  = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
          @file_size = 1261
        end
        should "work when using new URLs" do
          # Don't know the actual file size of the thumbnails since they don't work.
          assert_downloaded(1083, @new_small_thumbnail)
          assert_downloaded(1083, @new_medium_thumbnail)
          assert_downloaded(@file_size, @new_full_size_image)
        end
      end
      # Test a new illustration (one uploaded after 2014-09-30). New illustrations
      # must use /img-original/ for full size URLs. Old /imgXX/img/username/ style URLs
      # don't work for images uploaded after this date.
@@ -103,21 +81,6 @@ module Downloads
        end
      end
      context "downloading a bad id image" do
        setup do
          @bad_id_full   = "https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png"
          @bad_id_sample = "https://i.pximg.net/c/600x600/img-master/img/2017/11/22/01/06/44/65991677_p0_master1200.jpg"
        end
        should "not raise an error when rewriting the url" do
          assert_nothing_raised { assert_not_rewritten(@bad_id_full) }
        end
        should_eventually "rewrite bad id samples to full size" do
          assert_rewritten(@bad_id_full, @bad_id_sample)
        end
      end
      context "downloading a ugoira" do
        setup do
          @medium_page     = "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364"
@@ -138,6 +101,8 @@ module Downloads
      context "downloading a profile image" do
        should "download new profile images" do
          skip "profile images are no longer supported"
          @file_url = "https://i.pximg.net/user-profile/img/2014/12/18/10/31/23/8733472_7dc7310db6cc37163af145d04499e411_170.jpg"
          @file_size = 23_328
@@ -149,8 +114,10 @@ module Downloads
      context "downloading a background image" do
        should "download the image" do
-          @file_url = "http://i1.pixiv.net/background/img/2016/05/17/12/05/48/2074388_d4ac52034f7ca0af3e083d59fde7e97f.jpg"
+          skip "background images are no longer supported"
-          @file_size = 386_678
+
          @file_url = "https://i.pximg.net/background/img/2015/10/25/08/45/27/198128_77ddf78cdb162e3d1c0d5134af185813.jpg"
          @file_size = 0
          assert_not_rewritten(@file_url)
          assert_downloaded(@file_size, @file_url)
@@ -159,21 +126,23 @@ module Downloads
      context "downloading a novel image" do
        should "download new novel images" do
-          @file_url = "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg"
+          @file_url = "https://i.pximg.net/novel-cover-original/img/2017/07/27/23/14/17/8465454_80685d10e6df4d7d53ad347ddc18a36b.jpg"
-          @file_size = 316_311
+          @ref = 'https://www.pixiv.net/novel/show.php?id=8465454&mode=cover'
          @file_size = 532_129
-          assert_not_rewritten(@file_url)
+          assert_not_rewritten(@file_url, @ref)
-          assert_downloaded(@file_size, @file_url)
+          assert_downloaded(@file_size, @file_url, @ref)
        end
      end
      context "downloading a pixiv fanbox image" do
        should "work" do
-          @file_url = "https://fanbox.pixiv.net/images/post/31757/w/1200/0CdXtgr4al3t43gQG4NZLnpQ.jpeg"
+          @source = "https://www.pixiv.net/fanbox/creator/12491073/post/82406"
-          @file_size = 200_239
+          @file_url = "https://fanbox.pixiv.net/images/post/82406/D833IKA7FIesJXL8xx39rrG0.jpeg"
          @file_size = 873_387
-          assert_not_rewritten(@file_url)
+          assert_not_rewritten(@file_url, @source)
-          assert_downloaded(@file_size, @file_url)
+          assert_downloaded(@file_size, @file_url, @source)
        end
      end
    end
@@ -181,12 +150,11 @@ module Downloads
    context "An ugoira site for pixiv" do
      setup do
        @download = Downloads::File.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
-        @tempfile = @download.download!
+        @tempfile, strategy = @download.download!
        @tempfile.close!
      end
      should "capture the data" do
        assert_equal("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip", @download.source)
        assert_equal(2, @download.data[:ugoira_frame_data].size)
        if @download.data[:ugoira_frame_data][0]["file"]
          assert_equal([{"file"=>"000000.jpg", "delay"=>125}, {"file"=>"000001.jpg", "delay"=>125}], @download.data[:ugoira_frame_data])
--- a/test/unit/downloads/tumblr_test.rb
+++ b/test/unit/downloads/tumblr_test.rb
@@ -2,43 +2,52 @@ require 'test_helper'
 module Downloads
  class TumblrTest < ActiveSupport::TestCase
    # Currently there's no way to obtain the raw version of these images,
    # so we have to change the tests to validate against the 1280 version
    context "a download for a tumblr 500 sample" do
-      should "instead download the raw version" do
+      should "instead download the 1280 version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @ref = "https://noizave.tumblr.com/post/162206271767"
        @source = "https://24.media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_500.jpg"
-        @rewrite = "http://data.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_raw.jpg"
+        @rewrite = "https://media.tumblr.com/fc328250915434e66e8e6a92773f79d0/tumblr_mf4nshfibc1s0oswoo1_1280.jpg"
-        assert_rewritten(@rewrite, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-        assert_downloaded(196_617, @source)
+        assert_downloaded(113909, @source, @ref)
        # assert_downloaded(196_617, @source)
      end
    end
    context "a download for a *.media.tumblr.com/tumblr_$id_$size image without a larger size" do
      should "download the same version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @ref = "https://noizave.tumblr.com/post/162206271767"
        @source = "https://25.media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg"
-        @rewrite = "http://data.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_500.jpg"
+        @rewrite = "https://media.tumblr.com/tumblr_lxbzel2H5y1r9yjhso1_1280.jpg"
-        assert_rewritten(@rewrite, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-        assert_downloaded(90_122, @source)
+        assert_downloaded(41803, @source, @ref)
        # assert_downloaded(90_122, @source)
      end
    end
    context "a download for a *.media.tumblr.com/tumblr_$id_$size image with a larger size" do
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @ref = "https://noizave.tumblr.com/post/162206271767"
        @source = "https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"
-        @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"
+        @rewrite = "https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"
-        assert_rewritten(@rewrite, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-        assert_downloaded(34_060, @source)
+        assert_downloaded(62658, @source, @ref)
      end
    end
    context "a download for a *.media.tumblr.com/$hash/tumblr_$id_rN_$size image" do
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @ref = "https://noizave.tumblr.com/post/162206271767"
        @source = "https://33.media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_500.gif"
-        @rewrite = "http://data.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_raw.gif"
+        @rewrite = "https://media.tumblr.com/4b7fecf9a5a8284fbaefb051a2369b55/tumblr_npozqfwc9h1rt6u7do1_r1_1280.gif"
-        assert_rewritten(@rewrite, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-        assert_downloaded(1_234_017, @source)
+        assert_downloaded(1_234_017, @source, @ref)
      end
    end
@@ -46,40 +55,33 @@ module Downloads
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @source = "https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif"
-        @rewrite = "http://data.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif"
+        @rewrite = "https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_1280.gif"
-        assert_rewritten(@rewrite, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-        assert_downloaded(110_348, @source)
+        assert_downloaded(110_348, @source, @ref)
      end
    end
-    context "a download for a data.tumblr.com/$id_$size image with a larger size" do
+    context "a download for a media.tumblr.com/$id_$size image with a larger size" do
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
-        @source = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg"
+        @ref = "https://noizave.tumblr.com/post/162206271767"
-        @rewrite = "http://data.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg"
+        @source = "http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg"
-        assert_rewritten(@rewrite, @source)
+        @rewrite = "https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_1280.jpg"
-        assert_downloaded(153_885, @source)
+        assert_rewritten(@rewrite, @source, @ref)
        assert_downloaded(122413, @source)
        # assert_downloaded(153_885, @source)
      end
    end
-    context "a download for a data.tumblr.com/tumblr_$id_$size.jpg image" do
+    context "a download for a media.tumblr.com/tumblr_$id_$size.jpg image" do
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
-        @source = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg"
+        @ref = "https://noizave.tumblr.com/post/162206271767"
-        @rewrite = "http://data.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"
+        @source = "http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_250.jpg"
-        assert_rewritten(@rewrite, @source)
+        @rewrite = "https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg"
-        assert_downloaded(296_399, @source)
+        assert_rewritten(@rewrite, @source, @ref)
-      end
+        assert_downloaded(101869, @source, @ref)
-    end
+        # assert_downloaded(296_399, @source)
    context "a download for a gs1.wac.edgecastcdn.net image" do
      should "rewrite to the full tumblr version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @source = "https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png"
        @rewrite = "http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png"
        assert_downloaded(34_060, @source)
        assert_rewritten(@rewrite, @source)
      end
    end
@@ -87,9 +89,9 @@ module Downloads
      should "download the best available version" do
        skip "Tumblr keys are not set" unless Danbooru.config.tumblr_consumer_key
        @source = "https://noizave.tumblr.com/post/162206271767"
-        @rewrite = "http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png"
+        @rewrite = "https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"
-        assert_downloaded(3_620, @source)
+        assert_downloaded(3655, @source)
        assert_rewritten(@rewrite, @source)
      end
    end
--- a/test/unit/downloads/twitter_test.rb
+++ b/test/unit/downloads/twitter_test.rb
@@ -8,17 +8,19 @@ module Downloads
        @source = "https://twitter.com/CincinnatiZoo/status/859073537713328129"
        @rewrite = "https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"
        assert_rewritten(@rewrite, @source)
-        assert_downloaded(8_602_983, @source)
+
        # this takes awhile so just skip it unless we really want to test it
        # assert_downloaded(8_602_983, @source)
      end
    end
    context "downloading a 'https://twitter.com/:user/status/:id/photo/:n' card url" do
      should "download the orig file" do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @source = "https://twitter.com/masayasuf/status/870734961778630656/photo/1"
+        @source = "https://twitter.com/ry_o_ta_/status/1024316791688843269/photo/1"
-        @rewrite = "https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig"
+        @rewrite = "https://pbs.twimg.com/media/Djcar72VsAAZsGa.jpg:orig"
        assert_rewritten(@rewrite, @source)
-        assert_downloaded(788_206, @source)
+        assert_downloaded(103812, @source)
      end
    end
@@ -37,8 +39,9 @@ module Downloads
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @source = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large"
        @rewrite = "https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"
-        assert_rewritten(@rewrite, @source)
+        @ref = "https://twitter.com/nounproject/status/540944400767922176"
-        assert_downloaded(9800, @source)
+        assert_rewritten(@rewrite, @source, @ref)
        assert_downloaded(9800, @source, @ref)
      end
    end
  end
--- a/test/unit/post_replacement_test.rb
+++ b/test/unit/post_replacement_test.rb
@@ -1,37 +0,0 @@
 require 'test_helper'
 class PostReplacementTest < ActiveSupport::TestCase
  def setup
    super
    mock_iqdb_service!
    Delayed::Worker.delay_jobs = true # don't delete the old images right away
    @system = FactoryBot.create(:user, created_at: 2.weeks.ago)
    User.stubs(:system).returns(@system)
    @uploader = FactoryBot.create(:user, created_at: 2.weeks.ago, can_upload_free: true)
    @replacer = FactoryBot.create(:user, created_at: 2.weeks.ago, can_approve_posts: true)
    CurrentUser.user = @replacer
    CurrentUser.ip_addr = "127.0.0.1"
  end
  def teardown
    super
    CurrentUser.user = nil
    CurrentUser.ip_addr = nil
    Delayed::Worker.delay_jobs = false
  end
  context "Replacing" do
    setup do
      CurrentUser.scoped(@uploader, "127.0.0.2") do
        attributes = FactoryBot.attributes_for(:jpg_upload, as_pending: "0", tag_string: "lowres tag1")
        service = UploadService.new(attributes)
        upload = service.start!
        @post = upload.post
      end
    end
  end
 end
--- a/test/unit/post_test.rb
+++ b/test/unit/post_test.rb
@@ -1480,26 +1480,6 @@ class PostTest < ActiveSupport::TestCase
            assert_equal(18557054, @post.pixiv_id)
            @post.pixiv_id = nil
          end
          context "but doesn't have a pixiv id" do
            should "save the pixiv id" do
              @post.pixiv_id = 1234
              @post.update(source: "http://i1.pixiv.net/novel-cover-original/img/2016/11/03/20/10/58/7436075_f75af69f3eacd1656d3733c72aa959cf.jpg")
              assert_nil(@post.pixiv_id)
              @post.pixiv_id = 1234
              @post.update(source: "http://i2.pixiv.net/background/img/2016/10/30/12/27/30/7059005_da9946b806c10d391a81ed1117cd33d6.jpg")
              assert_nil(@post.pixiv_id)
              @post.pixiv_id = 1234
              @post.update(source: "http://i1.pixiv.net/img15/img/omega777/novel/2612734.jpg")
              assert_nil(@post.pixiv_id)
              @post.pixiv_id = 1234
              @post.update(source: "http://img08.pixiv.net/profile/nice/1408837.jpg")
              assert_nil(@post.pixiv_id)
            end
          end
        end
        should "normalize pixiv links" do
--- a/test/unit/sources/art_station_test.rb
+++ b/test/unit/sources/art_station_test.rb
@@ -4,8 +4,7 @@ module Sources
  class ArtStationTest < ActiveSupport::TestCase
    context "The source site for an art station artwork page" do
      setup do
-        @site = Sources::Site.new("https://www.artstation.com/artwork/04XA4")
+        @site = Sources::Strategies.find("https://www.artstation.com/artwork/04XA4")
        @site.get
      end
      should "get the image url" do
@@ -32,8 +31,7 @@ module Sources
    context "The source site for an art station projects page" do
      setup do
-        @site = Sources::Site.new("https://dantewontdie.artstation.com/projects/YZK5q")
+        @site = Sources::Strategies.find("https://dantewontdie.artstation.com/projects/YZK5q")
        @site.get
      end
      should "get the image url" do
@@ -61,8 +59,7 @@ module Sources
    context "The source site for a www.artstation.com/artwork/$slug page" do
      setup do
-        @site = Sources::Site.new("https://www.artstation.com/artwork/cody-from-sf")
+        @site = Sources::Strategies.find("https://www.artstation.com/artwork/cody-from-sf")
        @site.get
      end
      should "get the image url" do
@@ -75,8 +72,7 @@ module Sources
      setup do
        @url = "https://cdna.artstation.com/p/assets/images/images/006/029/978/large/amama-l-z.jpg"
        @ref = "https://www.artstation.com/artwork/4BWW2"
-        @site = Sources::Site.new(@url, referer_url: @ref)
+        @site = Sources::Strategies.find(@url, @ref)
        @site.get
      end
      should "fetch the source data" do
@@ -86,8 +82,7 @@ module Sources
    context "The source site for an ArtStation gallery" do
      setup do
-        @site = Sources::Site.new("https://www.artstation.com/artwork/BDxrA")
+        @site = Sources::Strategies.find("https://www.artstation.com/artwork/BDxrA")
        @site.get
      end
      should "get only image urls, not video urls" do
--- a/test/unit/sources/deviantart_test.rb
+++ b/test/unit/sources/deviantart_test.rb
@@ -9,8 +9,7 @@ module Sources
    context "A path-based artist url" do
      setup do
-        @site = Sources::Site.new("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
+        @site = Sources::Strategies.find("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484")
        @site.get
      end
      should "work" do
@@ -20,8 +19,7 @@ module Sources
    context "The source for a private DeviantArt image URL" do
      setup do
-        @site = Sources::Site.new("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png")
+        @site = Sources::Strategies.find("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png")
        @site.get
      end
      should "work" do
@@ -31,25 +29,24 @@ module Sources
    context "The source for a download-disabled DeviantArt artwork page" do
      should "get the image url" do
-        @site = Sources::Site.new("https://noizave.deviantart.com/art/test-no-download-697415967")
+        @site = Sources::Strategies.find("https://noizave.deviantart.com/art/test-no-download-697415967")
        assert_equal(["https://img00.deviantart.net/56ee/i/2017/219/2/3/test__no_download_by_noizave-dbj81lr.jpg"], @site.image_urls)
      end
    end
    context "The source for a DeviantArt image url" do
      should "fetch the source data" do
-        @site = Sources::Site.new("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg")
+        @site = Sources::Strategies.find("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg")
        assert_equal("hideyoshi", @site.artist_name)
-        assert_equal("https://hideyoshi.deviantart.com", @site.profile_url)
+        assert_equal("https://www.deviantart.com/hideyoshi", @site.profile_url)
-        assert_equal("https://orig00.deviantart.net/9e1f/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url)
+        assert_equal("https://pre00.deviantart.net/b5e6/th/pre/f/2016/265/3/5/legend_of_galactic_heroes_by_hideyoshi-daihpha.jpg", @site.image_url)
      end
    end
    context "The source for an DeviantArt artwork page" do
      setup do
-        @site = Sources::Site.new("http://noizave.deviantart.com/art/test-post-please-ignore-685436408")
+        @site = Sources::Strategies.find("http://noizave.deviantart.com/art/test-post-please-ignore-685436408")
        @site.get
      end
      should "get the image url" do
@@ -107,8 +104,7 @@ module Sources
    context "The source for a login-only DeviantArt artwork page" do
      setup do
-        @site = Sources::Site.new("http://noizave.deviantart.com/art/hidden-work-685458369")
+        @site = Sources::Strategies.find("http://noizave.deviantart.com/art/hidden-work-685458369")
        @site.get
      end
      should "get the image url" do
@@ -118,8 +114,7 @@ module Sources
    context "A source with malformed links in the artist commentary" do
      should "fix the links" do
-        @site = Sources::Site.new("https://teemutaiga.deviantart.com/art/Kisu-620666655")
+        @site = Sources::Strategies.find("https://teemutaiga.deviantart.com/art/Kisu-620666655")
        @site.get
        assert_match(%r!"Print available at Inprnt":\[http://www.inprnt.com/gallery/teemutaiga/kisu\]!, @site.dtext_artist_commentary_desc)
      end
--- a/test/unit/sources/nico_seiga_test.rb
+++ b/test/unit/sources/nico_seiga_test.rb
@@ -4,11 +4,8 @@ module Sources
  class NicoSeigaTest < ActiveSupport::TestCase
    context "The source site for nico seiga" do
      setup do
-        @site_1 = Sources::Site.new("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
+        @site_1 = Sources::Strategies.find("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
-        @site_1.get
+        @site_2 = Sources::Strategies.find("http://seiga.nicovideo.jp/seiga/im4937663")
        @site_2 = Sources::Site.new("http://seiga.nicovideo.jp/seiga/im4937663")
        @site_2.get
      end
      should "get the profile" do
@@ -34,11 +31,11 @@ module Sources
      should "get the tags" do
        assert(@site_1.tags.size > 0)
        first_tag = @site_1.tags.first
-        assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
+        assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
        assert(@site_2.tags.size > 0)
        first_tag = @site_2.tags.first
-        assert_equal(["アニメ", "http://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
+        assert_equal(["アニメ", "https://seiga.nicovideo.jp/tag/%E3%82%A2%E3%83%8B%E3%83%A1"], first_tag)
      end
      should "convert a page into a json representation" do
@@ -51,8 +48,7 @@ module Sources
      end
      should "work for a https://lohas.nicoseiga.jp/thumb/${id}i url" do
-        site = Sources::Site.new("https://lohas.nicoseiga.jp/thumb/6844226i")
+        site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i")
        site.get
        full_image_url = %r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226!
        assert_match(full_image_url, site.image_url)
--- a/test/unit/sources/nijie_test.rb
+++ b/test/unit/sources/nijie_test.rb
@@ -7,9 +7,7 @@ module Sources
        CurrentUser.user = FactoryBot.create(:user)
        CurrentUser.ip_addr = "127.0.0.1"
-        @site = Sources::Site.new("http://nijie.info/view.php?id=213043")
+        @site = Sources::Strategies.find("https://nijie.info/view.php?id=213043")
        @site.get
        sleep(5)
      end
      should "get the image url" do
@@ -17,7 +15,7 @@ module Sources
      end
      should "get the profile" do
-        assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url)
+        assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
      end
      should "get the artist name" do
@@ -25,15 +23,14 @@ module Sources
      end
      should "get the tags" do
-        assert_equal([["眼鏡", "http://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "http://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "http://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags)
+        assert_equal([["眼鏡", "https://nijie.info/search.php?word=%E7%9C%BC%E9%8F%A1"], ["リトルウィッチアカデミア", "https://nijie.info/search.php?word=%E3%83%AA%E3%83%88%E3%83%AB%E3%82%A6%E3%82%A3%E3%83%83%E3%83%81%E3%82%A2%E3%82%AB%E3%83%87%E3%83%9F%E3%82%A2"], ["アーシュラ先生", "https://nijie.info/search.php?word=%E3%82%A2%E3%83%BC%E3%82%B7%E3%83%A5%E3%83%A9%E5%85%88%E7%94%9F"]], @site.tags)
      end
      should "normalize （）characters in tags" do
        FactoryBot.create(:tag, :name => "kaga")
        FactoryBot.create(:wiki_page, :title => "kaga", :other_names => "加賀(艦これ)")
-        @site = Sources::Site.new("http://nijie.info/view.php?id=208316")
+        @site = Sources::Strategies.find("https://nijie.info/view.php?id=208316")
        @site.get
        assert_includes(@site.tags.map(&:first), "加賀（艦これ）")
        assert_includes(@site.translated_tags.map(&:first), "kaga")
@@ -50,16 +47,15 @@ module Sources
    context "The source site for a nijie referer url" do
      setup do
-        @site = Sources::Site.new("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", referer_url: "https://nijie.info/view_popup.php?id=213043")
+        @site = Sources::Strategies.find("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", "https://nijie.info/view_popup.php?id=213043")
        @site.get
      end
      should "get the image url" do
-        assert_equal("https://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url)
+        assert_equal("http://pic03.nijie.info/nijie_picture/728995_20170505014820_0.jpg", @site.image_url)
      end
      should "get the profile" do
-        assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url)
+        assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
      end
      should "get the artist name" do
@@ -69,8 +65,7 @@ module Sources
    context "The source site for a nijie popup" do
      setup do
-        @site = Sources::Site.new("https://nijie.info/view_popup.php?id=213043")
+        @site = Sources::Strategies.find("https://nijie.info/view_popup.php?id=213043")
        @site.get
      end
      should "get the image url" do
@@ -78,7 +73,7 @@ module Sources
      end
      should "get the profile" do
-        assert_equal("http://nijie.info/members.php?id=728995", @site.profile_url)
+        assert_equal("https://nijie.info/members.php?id=728995", @site.profile_url)
      end
      should "get the artist name" do
@@ -88,8 +83,7 @@ module Sources
    context "The source site for a nijie gallery" do
      setup do
-        @site = Sources::Site.new("http://nijie.info/view.php?id=218856")
+        @site = Sources::Strategies.find("https://nijie.info/view.php?id=218856")
        @site.get
      end
      should "get the image urls" do
--- a/test/unit/sources/pawoo_test.rb
+++ b/test/unit/sources/pawoo_test.rb
@@ -5,8 +5,7 @@ module Sources
    context "The source site for a https://pawoo.net/web/status/$id url"  do
      setup do
        skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
-        @site = Sources::Site.new("https://pawoo.net/web/statuses/1202176")
+        @site = Sources::Strategies.find("https://pawoo.net/web/statuses/1202176")
        @site.get
      end
      should "get the profile" do
@@ -35,8 +34,7 @@ module Sources
    context "The source site for a https://pawoo.net/$user/$id url"  do
      setup do
        skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
-        @site = Sources::Site.new("https://pawoo.net/@evazion/19451018")
+        @site = Sources::Strategies.find("https://pawoo.net/@evazion/19451018")
        @site.get
      end
      should "get the profile" do
@@ -89,8 +87,7 @@ module Sources
        skip "Pawoo keys not set" unless Danbooru.config.pawoo_client_id
        @url = "https://img.pawoo.net/media_attachments/files/001/298/028/original/55a6fd252778454b.mp4"
        @ref = "https://pawoo.net/@evazion/19451018"
-        @site = Sources::Site.new(@url, referer_url: @ref)
+        @site = Sources::Strategies.find(@url, @ref)
        @site.get
      end
      should "fetch the source data" do
--- a/test/unit/sources/pixiv_test.rb
+++ b/test/unit/sources/pixiv_test.rb
@@ -3,8 +3,8 @@ require 'test_helper'
 module Sources
  class PixivTest < ActiveSupport::TestCase
    def get_source(source)
-      @site = Sources::Site.new(source)
+      @site = Sources::Strategies.find(source)
-      @site.get
+      
      @site
    rescue Net::OpenTimeout
      skip "Remote connection to #{source} failed"
@@ -23,19 +23,22 @@ module Sources
    context "in all cases" do
      context "A touch page" do
        setup do
-          @site = Sources::Site.new("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915")
+          @site = Sources::Strategies.find("http://touch.pixiv.net/member_illust.php?mode=medium&illust_id=59687915")
-          @image_urls = @site.get
+          @image_urls = @site.image_urls
        end
        should "get all the image urls" do
-          assert_equal("https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png", @image_urls)
+          expected_urls = [
            "https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p0.png",
            "https://i.pximg.net/img-original/img/2016/10/29/17/13/23/59687915_p1.png"
          ].sort
          assert_equal(expected_urls, @image_urls.sort)
        end
      end
      context "A gallery page" do
        setup do
-          @site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482")
+          @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=49270482")
          @site.get
          @image_urls = @site.image_urls
        end
@@ -46,8 +49,7 @@ module Sources
      context "An ugoira source site for pixiv" do
        setup do
-          @site = Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
+          @site = Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=62247364")
          @site.get
        end
        should "get the file url" do
@@ -66,8 +68,7 @@ module Sources
      context "A https://i.pximg.net/img-zip/ugoira/* source" do
        should "get the metadata" do
-          @site = Sources::Site.new("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip")
+          @site = Sources::Strategies.find("https://i.pximg.net/img-zip-ugoira/img/2017/04/04/08/57/38/62247364_ugoira1920x1080.zip")
          @site.get
          assert_equal("uroobnad2", @site.artist_name)
        end
@@ -79,7 +80,7 @@ module Sources
        end
        should "get the profile" do
-          assert_equal("http://www.pixiv.net/member.php?id=696859", @site.profile_url)
+          assert_equal("https://www.pixiv.net/member.php?id=696859", @site.profile_url)
        end
        should "get the artist name" do
@@ -142,12 +143,17 @@ module Sources
        should "get the full size image url" do
          assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.image_url)
        end        
        should "get the full size image url for the canonical url" do
          assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url)
        end
      end
      context "fetching source data for a deleted work" do
        should "raise a bad id error" do
          assert_raise(::PixivApiClient::BadIDError) do
            get_source("https://i.pximg.net/img-original/img/2017/11/22/01/06/44/65991677_p0.png")
            @site.image_urls
          end
        end
      end
--- a/test/unit/sources/tumblr_test.rb
+++ b/test/unit/sources/tumblr_test.rb
@@ -9,8 +9,7 @@ module Sources
    context "The source for a 'http://*.tumblr.com/post/*' photo post with a single image" do
      setup do
-        @site = Sources::Site.new("https://noizave.tumblr.com/post/162206271767")
+        @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162206271767")
        @site.get
      end
      should "get the artist name" do
@@ -22,7 +21,7 @@ module Sources
      end
      should "get the tags" do
-        tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]]
+        tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
        assert_equal(tags, @site.tags)
      end
@@ -68,7 +67,7 @@ module Sources
      end
      should "get the image url" do
-        assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url)
+        assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url)
      end
      should "get the artist" do
@@ -82,16 +81,15 @@ module Sources
    context "The source for a 'http://*.tumblr.com/image/*' image page" do
      setup do
-        @site = Sources::Site.new("https://noizave.tumblr.com/image/162206271767")
+        @site = Sources::Strategies.find("https://noizave.tumblr.com/image/162206271767")
        @site.get
      end
      should "get the image url" do
-        assert_equal("http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_raw.png", @site.image_url)
+        assert_equal("https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png", @site.image_url)
      end
      should "get the tags" do
-        tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red-hair"]]
+        tags = [["tag", "https://tumblr.com/tagged/tag"], ["red_hair", "https://tumblr.com/tagged/red_hair"]]
        assert_equal(tags, @site.tags)
      end
    end
@@ -100,20 +98,19 @@ module Sources
      setup do
        @url = "https://78.media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"
        @ref = "https://noizave.tumblr.com/post/162094447052"
-        @site = Sources::Site.new(@url, referer_url: @ref)
+        @site = Sources::Strategies.find(@url, @ref)
        @site.get
      end
      should "get the image urls" do
        urls = %w[
-          http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_raw.png
+          https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_orwwptNBCE1wsfqepo1_1280.png
-          http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_raw.jpg
+          https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg
-          http://data.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_raw.gif
+          https://media.tumblr.com/d2ed224f135b0c81f812df81a0a8692d/tumblr_orwwptNBCE1wsfqepo3_1280.gif
-          http://data.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_raw.png
+          https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_inline_os3134mABB1v11u29_1280.png
-          http://data.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_raw.gif
+          https://media.tumblr.com/34ed9d0ff4a21625981372291cb53040/tumblr_nv3hwpsZQY1uft51jo1_1280.gif
        ]
-        assert_equal(urls, @site.image_urls)
+        assert_equal(urls.sort, @site.image_urls.sort)
      end
      should "get the tags" do
@@ -129,17 +126,16 @@ module Sources
    context "The source for a 'http://*.tumblr.com/post/*' text post with inline images" do
      setup do
-        @site = Sources::Site.new("https://noizave.tumblr.com/post/162221502947")
+        @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162221502947")
        @site.get
      end
      should "get the image urls" do
        urls = %w[
-          http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_raw.png
+          https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os2zhkfhY01v11u29_1280.png
-          http://data.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_raw.jpg
+          https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_inline_os2zkg02xH1v11u29_1280.jpg
        ]
-        assert_equal(urls, @site.image_urls)
+        assert_equal(urls.sort, @site.image_urls.sort)
      end
      should "get the commentary" do
@@ -151,14 +147,13 @@ module Sources
    context "The source for a 'http://*.tumblr.com/post/*' video post with inline images" do
      setup do
-        @site = Sources::Site.new("https://noizave.tumblr.com/post/162222617101")
+        @site = Sources::Strategies.find("https://noizave.tumblr.com/post/162222617101")
        @site.get
      end
      should "get the image urls" do
        urls = %w[
          https://vtt.tumblr.com/tumblr_os31dkexhK1wsfqep.mp4
-          http://data.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_raw.png
+          https://media.tumblr.com/afed9f5b3c33c39dc8c967e262955de2/tumblr_inline_os31dclyCR1v11u29_1280.png
        ]
        assert_equal(urls, @site.image_urls)
@@ -167,12 +162,11 @@ module Sources
    context "The source for a 'http://*.tumblr.com/post/*' answer post with inline images" do
      setup do
-        @site = Sources::Site.new("https://noizave.tumblr.com/post/171237880542/test-ask")
+        @site = Sources::Strategies.find("https://noizave.tumblr.com/post/171237880542/test-ask")
        @site.get
      end
      should "get the image urls" do
-        urls = ["http://data.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_raw.png"]
+        urls = ["https://media.tumblr.com/cb481f031010e8ddad564b2150149c9a/tumblr_inline_p4nxoyLrSh1v11u29_1280.png"]
        assert_equal(urls, @site.image_urls)
      end
--- a/test/unit/sources/twitter_test.rb
+++ b/test/unit/sources/twitter_test.rb
@@ -2,79 +2,16 @@ require 'test_helper'
 module Sources
  class TwitterTest < ActiveSupport::TestCase
    context "A video" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Site.new("https://twitter.com/CincinnatiZoo/status/859073537713328129")
        @site.get
      end
      should "get the image url" do
        assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url)
      end
    end
    context "An animated gif" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Site.new("https://twitter.com/DaniStrawberry1/status/859435334765088769")
        @site.get
      end
      should "get the image url" do
        assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url)
      end
    end
    context "A twitter summary card" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Site.new("https://twitter.com/NatGeo/status/932700115936178177")
        @site.get
      end
      should "get the image url" do
        assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url)
      end
    end
    context "A twitter summary card from twitter" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Site.new("https://twitter.com/masayasuf/status/870734961778630656/photo/1")
        @site.get
      end
      should "get the image url" do
        assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url)
      end
    end
    context "A twitter summary card from twitter with a :large image" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Site.new("https://twitter.com/aranobu/status/817736083567820800")
        @site.get
      end
      should "get the image url" do
        assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url)
      end
    end
    context "An extended tweet" do
      should "extract the correct image url" do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://twitter.com/onsen_musume_jp/status/865534101918330881")
+        @site = Sources::Strategies.find("https://twitter.com/onsen_musume_jp/status/865534101918330881")
        @site.get
        assert_equal(["https://pbs.twimg.com/media/DAL-ntWV0AEbhes.jpg:orig"], @site.image_urls)
      end
      should "extract all the image urls" do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://twitter.com/aoimanabu/status/892370963630743552")
+        @site = Sources::Strategies.find("https://twitter.com/aoimanabu/status/892370963630743552")
        @site.get
        urls = %w[
          https://pbs.twimg.com/media/DGJWp59UIAA_-en.jpg:orig
@@ -86,11 +23,71 @@ module Sources
      end
    end
    context "A video" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129")
      end
      should "get the image url" do
        assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url)
      end
    end
    context "An animated gif" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Strategies.find("https://twitter.com/DaniStrawberry1/status/859435334765088769")
      end
      should "get the image url" do
        assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url)
      end
    end
    context "A twitter summary card" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Strategies.find("https://twitter.com/NatGeo/status/932700115936178177")
      end
      should "get the image url" do
        assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url)
      end
    end
    context "A twitter summary card from twitter" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656/photo/1")
      end
      should "get the image url" do
        skip "Find another url, the masayasuf tweet no longer exists"
        assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url)
      end
    end
    context "A twitter summary card from twitter with a :large image" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
        @site = Sources::Strategies.find("https://twitter.com/aranobu/status/817736083567820800")
      end
      should "get the image url" do
        assert_equal("https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig", @site.image_url)
      end
      should "get the canonical url" do
        assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url)
      end
    end
    context "The source site for a restricted twitter" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://mobile.twitter.com/Strangestone/status/556440271961858051")
+        @site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051")
-        @site.get
+        
      end
      should "get the image url" do
@@ -101,8 +98,7 @@ module Sources
    context "The source site for twitter" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://mobile.twitter.com/nounproject/status/540944400767922176")
+        @site = Sources::Strategies.find("https://mobile.twitter.com/nounproject/status/540944400767922176")
        @site.get
      end
      should "get the profile" do
@@ -135,8 +131,7 @@ module Sources
    context "The source site for a direct image and a referer" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", referer_url: "https://twitter.com/nounproject/status/540944400767922176")
+        @site = Sources::Strategies.find("https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:large", "https://twitter.com/nounproject/status/540944400767922176")
        @site.get
      end
      should "get the artist name" do
@@ -151,8 +146,7 @@ module Sources
    context "The source site for a https://twitter.com/i/web/status/:id url" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://twitter.com/i/web/status/943446161586733056")
+        @site = Sources::Strategies.find("https://twitter.com/i/web/status/943446161586733056")
        @site.get
      end
      should "fetch the source data" do
@@ -163,8 +157,7 @@ module Sources
    context "A tweet" do
      setup do
        skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
-        @site = Sources::Site.new("https://twitter.com/noizave/status/875768175136317440")
+        @site = Sources::Strategies.find("https://twitter.com/noizave/status/875768175136317440")
        @site.get
      end
      should "convert urls, hashtags, and mentions to dtext" do
--- a/test/unit/tag_alias_correction_test.rb
+++ b/test/unit/tag_alias_correction_test.rb
@@ -17,6 +17,7 @@ class TagAliasCorrectionTest < ActiveSupport::TestCase
    context "with a bad cache and post counts" do
      setup do
        Cache.delete("ta:#{Cache.hash('bbb')}")
        Cache.put("ta:#{Cache.hash('aaa')}", "zzz")
        Tag.where(:name => "aaa").update_all("post_count = -3")
        @correction = TagAliasCorrection.new(@tag_alias.id)