129 lines
3.8 KiB
Ruby
129 lines
3.8 KiB
Ruby
# This is a collection of strategies for extracting information about a
|
|
# resource. At a minimum it tries to extract the artist name and a canonical
|
|
# URL to download the image from. But it can also be used to normalize a URL
|
|
# for use with the artist finder. It differs from Downloads::RewriteStrategies
|
|
# in that the latter is more for normalizing and rewriting a URL until it is
|
|
# suitable for downloading, whereas Sources::Strategies is more for meta-data
|
|
# that can only be obtained by downloading and parsing the resource.
|
|
|
|
module Sources
|
|
module Strategies
|
|
class Base
|
|
attr_reader :url, :referer_url
|
|
attr_reader :artist_name, :profile_url, :image_url, :tags
|
|
attr_reader :artist_commentary_title, :artist_commentary_desc
|
|
|
|
def self.url_match?(url)
|
|
false
|
|
end
|
|
|
|
def initialize(url, referer_url = nil)
|
|
@url = url
|
|
@referer_url = referer_url
|
|
end
|
|
|
|
# No remote calls are made until this method is called.
|
|
def get
|
|
raise NotImplementedError
|
|
end
|
|
|
|
def get_size
|
|
@get_size ||= Downloads::File.new(@image_url).size
|
|
end
|
|
|
|
# Subclasses should return true only if the URL is in its final normalized form.
|
|
#
|
|
# Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
|
|
# => true
|
|
# Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
|
|
# => false
|
|
def normalized_for_artist_finder?
|
|
false
|
|
end
|
|
|
|
# Subclasses should return true only if the URL is a valid URL that could
|
|
# be converted into normalized form.
|
|
#
|
|
# Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
|
|
# => true
|
|
# Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
|
|
# => false
|
|
def normalizable_for_artist_finder?
|
|
false
|
|
end
|
|
|
|
def normalize_for_artist_finder!
|
|
url
|
|
end
|
|
|
|
def site_name
|
|
raise NotImplementedError
|
|
end
|
|
|
|
def unique_id
|
|
artist_name
|
|
end
|
|
|
|
def artist_record
|
|
if artist_name.present?
|
|
Artist.other_names_match(artist_name)
|
|
else
|
|
nil
|
|
end
|
|
end
|
|
|
|
def image_urls
|
|
[image_url]
|
|
end
|
|
|
|
def tags
|
|
(@tags || []).uniq
|
|
end
|
|
|
|
def translated_tags
|
|
translated_tags = tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
|
|
translated_tags.map { |tag| [tag.name, tag.category] }
|
|
end
|
|
|
|
# Given a tag from the source site, should return an array of corresponding Danbooru tags.
|
|
def translate_tag(untranslated_tag)
|
|
translated_tags = Tag.where(name: WikiPage.active.other_names_equal(untranslated_tag).uniq.select(:title))
|
|
|
|
if translated_tags.empty?
|
|
normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
|
|
translated_tags = Tag.nonempty.where(name: normalized_name)
|
|
end
|
|
|
|
translated_tags
|
|
end
|
|
|
|
# Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
|
|
def fake_referer
|
|
nil
|
|
end
|
|
|
|
def dtext_artist_commentary_title
|
|
self.class.to_dtext(artist_commentary_title)
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
self.class.to_dtext(artist_commentary_desc)
|
|
end
|
|
|
|
protected
|
|
def agent
|
|
raise NotImplementedError
|
|
end
|
|
|
|
# Convert commentary to dtext by stripping html tags. Sites can override
|
|
# this to customize how their markup is translated to dtext.
|
|
def self.to_dtext(text)
|
|
text = text.to_s
|
|
text = Rails::Html::FullSanitizer.new.sanitize(text, encode_special_chars: false)
|
|
text = CGI::unescapeHTML(text)
|
|
text
|
|
end
|
|
end
|
|
end
|
|
end
|