sources: rename Sources::Strategies to Source::Extractor.
Rename Sources::Strategies to Source::Extractor. A Source::Extractor represents a thing that extracts information from a given URL.
This commit is contained in:
@@ -4,7 +4,7 @@ class SourcesController < ApplicationController
|
||||
respond_to :js, :json, :xml
|
||||
|
||||
def show
|
||||
@source = Sources::Strategies.find(params[:url], params[:ref])
|
||||
@source = Source::Extractor.find(params[:url], params[:ref])
|
||||
|
||||
respond_with(@source.to_h) do |format|
|
||||
format.xml { render xml: @source.to_h.to_xml(root: "source") }
|
||||
|
||||
@@ -30,15 +30,15 @@ class IqdbClient
|
||||
if file.present?
|
||||
file = file.tempfile
|
||||
elsif url.present?
|
||||
strategy = Sources::Strategies.find(url)
|
||||
raise Error, "Can't do reverse image search: #{url} has multiple images. Enter the URL of a single image." if strategy.image_urls.size > 1
|
||||
extractor = Source::Extractor.find(url)
|
||||
raise Error, "Can't do reverse image search: #{url} has multiple images. Enter the URL of a single image." if extractor.image_urls.size > 1
|
||||
|
||||
download_url = strategy.image_urls.first
|
||||
file = Sources::Strategies.find(download_url).download_file!(download_url)
|
||||
download_url = extractor.image_urls.first
|
||||
file = Source::Extractor.find(download_url).download_file!(download_url)
|
||||
elsif image_url.present?
|
||||
file = Sources::Strategies.find(image_url).download_file!(image_url)
|
||||
file = Source::Extractor.find(image_url).download_file!(image_url)
|
||||
elsif file_url.present?
|
||||
file = Sources::Strategies.find(file_url).download_file!(file_url)
|
||||
file = Source::Extractor.find(file_url).download_file!(file_url)
|
||||
elsif post_id.present?
|
||||
file = Post.find(post_id).file(:preview)
|
||||
elsif media_asset_id.present?
|
||||
|
||||
@@ -66,14 +66,14 @@ class PostReplacementProcessor
|
||||
return MediaFile.open(file) if file.present?
|
||||
raise "No file or source URL provided" if source_url.blank?
|
||||
|
||||
strategy = Sources::Strategies.find(source_url, referer_url)
|
||||
raise NotImplementedError, "No login credentials configured for #{strategy.site_name}." unless strategy.class.enabled?
|
||||
extractor = Source::Extractor.find(source_url, referer_url)
|
||||
raise NotImplementedError, "No login credentials configured for #{extractor.site_name}." unless extractor.class.enabled?
|
||||
|
||||
image_urls = strategy.image_urls
|
||||
image_urls = extractor.image_urls
|
||||
raise "#{source_url} contains multiple images" if image_urls.size > 1
|
||||
|
||||
image_url = image_urls.first
|
||||
file = strategy.download_file!(image_url)
|
||||
file = extractor.download_file!(image_url)
|
||||
|
||||
[file, image_url]
|
||||
end
|
||||
|
||||
317
app/logical/source/extractor.rb
Normal file
317
app/logical/source/extractor.rb
Normal file
@@ -0,0 +1,317 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# A source extractor is used to extract information from a given source URL. It
|
||||
# extracts all the images and videos from the URL, as well as metadata such as
|
||||
# the tags, commentary, artist name, profile URL, and additional names and URLs
|
||||
# for new artist entries.
|
||||
#
|
||||
# To add a new site, create a subclass of Source::Extractor and implement the following methods:
|
||||
#
|
||||
# * match? - True if the extractor should be used for this URL.
|
||||
# * image_urls - The list of images or videos at this URL. Used during uploads.
|
||||
# * page_url - The page containing the images. Used for post sources.
|
||||
# * profile_url - The URL of the artist's profile page. Used for artist finding.
|
||||
# * profile_urls - Extra profile URLs to add to the artist entry.
|
||||
# * tag_name - The artist's login name. Used as the default name for new artist tags.
|
||||
# * artist_name - The artist's display name. Used as an other name in new artist entries.
|
||||
# * other_names - Extra names used in new artist entries.
|
||||
# * tags - The artist's tags for the work. Used by translated tags.
|
||||
# * artist_commentary_title - The artist's title of the work. Used for artist commentaries.
|
||||
# * artist_commentary_desc - The artist's description of the work. Used for artist commentaries.
|
||||
#
|
||||
module Source
|
||||
class Extractor
|
||||
extend Memoist
|
||||
|
||||
# The http timeout to download a file.
|
||||
DOWNLOAD_TIMEOUT = 60
|
||||
|
||||
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
||||
delegate :site_name, to: :parsed_url
|
||||
|
||||
SUBCLASSES = [
|
||||
Source::Extractor::Pixiv,
|
||||
Source::Extractor::Twitter,
|
||||
Source::Extractor::Tumblr,
|
||||
Source::Extractor::NicoSeiga,
|
||||
Source::Extractor::DeviantArt,
|
||||
Source::Extractor::Moebooru,
|
||||
Source::Extractor::Nijie,
|
||||
Source::Extractor::ArtStation,
|
||||
Source::Extractor::HentaiFoundry,
|
||||
Source::Extractor::Fanbox,
|
||||
Source::Extractor::Mastodon,
|
||||
Source::Extractor::PixivSketch,
|
||||
Source::Extractor::Weibo,
|
||||
Source::Extractor::Newgrounds,
|
||||
Source::Extractor::Skeb,
|
||||
Source::Extractor::Lofter,
|
||||
Source::Extractor::Foundation,
|
||||
Source::Extractor::Plurk,
|
||||
Source::Extractor::Tinami,
|
||||
Source::Extractor::Fantia,
|
||||
]
|
||||
|
||||
# Should return true if the extractor is configured correctly. Return false
|
||||
# if the extractor requires api keys that have not been configured.
|
||||
def self.enabled?
|
||||
true
|
||||
end
|
||||
|
||||
# Return the extractor for the given `url`. The `url` may be either a
|
||||
# direct image URL, or the URL of a page containing one or more images.
|
||||
#
|
||||
# The `referer_url` is optionally provided when uploading direct image URLs
|
||||
# with the bookmarklet. This will be the page containing the image. This
|
||||
# lets us extract information from sites like Twitter, where the image URL by
|
||||
# itself doesn't have enough information to find the page containing the image.
|
||||
#
|
||||
# @param url [String] The URL to extract information from.
|
||||
# @param referer_url [String, nil] The page URL if `url` is an image URL.
|
||||
# @return [Source::Extractor]
|
||||
def self.find(url, referer_url = nil, default: Extractor::Null)
|
||||
extractor = SUBCLASSES.lazy.map { |extractor| extractor.new(url, referer_url) }.find(&:match?)
|
||||
extractor || default&.new(url, referer_url)
|
||||
end
|
||||
|
||||
# Initialize an extractor. Normally one should call `Source::Extractor.find`
|
||||
# instead of instantiating an extractor directly.
|
||||
#
|
||||
# @param url [String] The URL to extract information form.
|
||||
# @param referer_url [String, nil] The page URL if `url` is an image URL.
|
||||
def initialize(url, referer_url = nil)
|
||||
@url = url.to_s
|
||||
@referer_url = referer_url&.to_s
|
||||
|
||||
@parsed_url = Source::URL.parse(url)
|
||||
@parsed_referer = Source::URL.parse(referer_url) if referer_url.present?
|
||||
@parsed_referer = nil if parsed_url&.site_name != parsed_referer&.site_name
|
||||
end
|
||||
|
||||
# Should return true if this extractor should be used for this URL.
|
||||
# Normally, this should check if the URL is from the right site.
|
||||
#
|
||||
# @return [Boolean]
|
||||
def match?
|
||||
false
|
||||
end
|
||||
|
||||
# The list of image (or video) URLs extracted from the target URL.
|
||||
#
|
||||
# If the target URL is a page, this should be every image on the page. If
|
||||
# the target URL is a single image, this should be the image itself.
|
||||
#
|
||||
# @return [Array<String>]
|
||||
def image_urls
|
||||
[]
|
||||
end
|
||||
|
||||
# The URL of the page containing the image, or nil if it can't be found.
|
||||
#
|
||||
# The source of the post will be set to the page URL if it's not possible
|
||||
# to convert the image URL to a page URL for this site.
|
||||
#
|
||||
# For example, for sites like Twitter and Tumblr, it's not possible to
|
||||
# convert image URLs to page URLs, so the page URL will be used as the
|
||||
# source for these sites. For sites like Pixiv and DeviantArt, it is
|
||||
# possible to convert image URLs to page URLs, so the image URL will be
|
||||
# used as the source for these sites. This is determined by whether
|
||||
# `Source::URL#page_url` returns a URL or nil.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def page_url
|
||||
nil
|
||||
end
|
||||
|
||||
# A name to suggest as the artist's tag name when creating a new artist.
|
||||
# This should usually be the artist's login name. It should be plain ASCII,
|
||||
# hopefully unique, and it should follow the rules for tag names (see
|
||||
# TagNameValidator).
|
||||
#
|
||||
# @return [String, nil]
|
||||
def tag_name
|
||||
artist_name
|
||||
end
|
||||
|
||||
# The artists's primary name. If an artist has both a display name and a
|
||||
# login name, this should be the display name. This will be used as an
|
||||
# other name for new artist entries.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def artist_name
|
||||
nil
|
||||
end
|
||||
|
||||
# A list of all names associated with the artist. These names will be suggested
|
||||
# as other names when creating a new artist.
|
||||
#
|
||||
# @return [Array<String>]
|
||||
def other_names
|
||||
[artist_name, tag_name].compact.uniq
|
||||
end
|
||||
|
||||
# A link to the artist's profile page on the site. This will be used for
|
||||
# artist finding purposes, so it needs to match the URL in the artist entry.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def profile_url
|
||||
nil
|
||||
end
|
||||
|
||||
# A list of all profile urls associated with the artist. These urls will
|
||||
# be suggested when creating a new artist.
|
||||
#
|
||||
# @return [Array<String>]
|
||||
def profile_urls
|
||||
[profile_url].compact
|
||||
end
|
||||
|
||||
# The artist's title of the work. Used for the artist commentary.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def artist_commentary_title
|
||||
nil
|
||||
end
|
||||
|
||||
# The artist's description of the work. Used for the artist commentary.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def artist_commentary_desc
|
||||
nil
|
||||
end
|
||||
|
||||
# Download the file at the given url. Raises Danbooru::Http::DownloadError if the download fails, or
|
||||
# Danbooru::Http::FileTooLargeError if the file is too large.
|
||||
#
|
||||
# @return [MediaFile] the downloaded file
|
||||
def download_file!(download_url)
|
||||
response, file = http_downloader.download_media(download_url)
|
||||
file
|
||||
end
|
||||
|
||||
# A http client for API requests.
|
||||
def http
|
||||
Danbooru::Http.new.proxy.public_only
|
||||
end
|
||||
|
||||
# A http client for downloading files.
|
||||
def http_downloader
|
||||
http.timeout(DOWNLOAD_TIMEOUT).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare)
|
||||
end
|
||||
|
||||
def artists
|
||||
ArtistFinder.find_artists(profile_url)
|
||||
end
|
||||
|
||||
# A new artist entry with suggested defaults for when the artist doesn't
|
||||
# exist. Used in Artist.new_with_defaults to prefill the new artist form.
|
||||
def new_artist
|
||||
Artist.new(
|
||||
name: tag_name,
|
||||
other_names: other_names,
|
||||
url_string: profile_urls.join("\n")
|
||||
)
|
||||
end
|
||||
|
||||
def tags
|
||||
(@tags || []).uniq
|
||||
end
|
||||
|
||||
def normalized_tags
|
||||
tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
WikiPage.normalize_other_name(tag).downcase
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags.reject(&:artist?)
|
||||
end
|
||||
|
||||
# Given a tag from the source site, should return an array of corresponding Danbooru tags.
|
||||
def translate_tag(untranslated_tag)
|
||||
return [] if untranslated_tag.blank?
|
||||
|
||||
translated_tag_names = WikiPage.active.other_names_include(untranslated_tag).uniq.pluck(:title)
|
||||
translated_tag_names = TagAlias.to_aliased(translated_tag_names)
|
||||
translated_tags = Tag.where(name: translated_tag_names)
|
||||
|
||||
if translated_tags.empty?
|
||||
normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
|
||||
translated_tags = Tag.nonempty.where(name: normalized_name)
|
||||
end
|
||||
|
||||
translated_tags
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_title
|
||||
self.class.to_dtext(artist_commentary_title)
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
self.class.to_dtext(artist_commentary_desc)
|
||||
end
|
||||
|
||||
# A search query that should return any posts that were previously
|
||||
# uploaded from the same source. These may be duplicates, or they may be
|
||||
# other posts from the same gallery.
|
||||
def related_posts_search_query
|
||||
"source:#{url}"
|
||||
end
|
||||
|
||||
def related_posts(limit = 5)
|
||||
Post.system_tag_match(related_posts_search_query).paginate(1, limit: limit)
|
||||
end
|
||||
|
||||
# A hash containing the results of any API calls made by the extractor. For debugging purposes only.
|
||||
def api_response
|
||||
nil
|
||||
end
|
||||
|
||||
def to_h
|
||||
{
|
||||
:artist => {
|
||||
:name => artist_name,
|
||||
:tag_name => tag_name,
|
||||
:other_names => other_names,
|
||||
:profile_url => profile_url,
|
||||
:profile_urls => profile_urls
|
||||
},
|
||||
:artists => artists.as_json(include: :sorted_urls),
|
||||
:image_urls => image_urls,
|
||||
:page_url => page_url,
|
||||
:tags => tags,
|
||||
:normalized_tags => normalized_tags,
|
||||
:translated_tags => translated_tags,
|
||||
:artist_commentary => {
|
||||
:title => artist_commentary_title,
|
||||
:description => artist_commentary_desc,
|
||||
:dtext_title => dtext_artist_commentary_title,
|
||||
:dtext_description => dtext_artist_commentary_desc
|
||||
},
|
||||
:api_response => api_response.to_h
|
||||
}
|
||||
end
|
||||
|
||||
def to_json(*_args)
|
||||
to_h.to_json
|
||||
end
|
||||
|
||||
def http_exists?(url)
|
||||
http_downloader.head(url).status.success?
|
||||
end
|
||||
|
||||
# Convert commentary to dtext by stripping html tags. Sites can override
|
||||
# this to customize how their markup is translated to dtext.
|
||||
def self.to_dtext(text)
|
||||
text = text.to_s
|
||||
text = Rails::Html::FullSanitizer.new.sanitize(text, encode_special_chars: false)
|
||||
text = CGI.unescapeHTML(text)
|
||||
text
|
||||
end
|
||||
|
||||
memoize :http, :http_downloader, :related_posts
|
||||
end
|
||||
end
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::ArtStation
|
||||
module Sources::Strategies
|
||||
class ArtStation < Base
|
||||
class Source::Extractor
|
||||
class ArtStation < Source::Extractor
|
||||
def match?
|
||||
Source::URL::ArtStation === parsed_url
|
||||
end
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
class DeviantArt < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class DeviantArt < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.deviantart_client_id.present? && Danbooru.config.deviantart_client_secret.present?
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Fanbox
|
||||
module Sources
|
||||
module Strategies
|
||||
class Fanbox < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Fanbox < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Fanbox === parsed_url
|
||||
end
|
||||
@@ -1,7 +1,7 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Sources::Strategies
|
||||
class Fantia < Base
|
||||
class Source::Extractor
|
||||
class Fantia < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.fantia_session_id.present?
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Foundation
|
||||
module Sources
|
||||
module Strategies
|
||||
class Foundation < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Foundation < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Foundation === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::HentaiFoundry
|
||||
module Sources
|
||||
module Strategies
|
||||
class HentaiFoundry < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class HentaiFoundry < Source::Extractor
|
||||
def match?
|
||||
Source::URL::HentaiFoundry === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Lofter
|
||||
module Sources
|
||||
module Strategies
|
||||
class Lofter < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Lofter < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Lofter === parsed_url
|
||||
end
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Mastodon
|
||||
module Sources::Strategies
|
||||
class Mastodon < Base
|
||||
class Source::Extractor
|
||||
class Mastodon < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Mastodon === parsed_url
|
||||
end
|
||||
@@ -1,10 +1,10 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Moebooru
|
||||
module Sources
|
||||
module Strategies
|
||||
class Moebooru < Base
|
||||
delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true
|
||||
module Source
|
||||
class Extractor
|
||||
class Moebooru < Source::Extractor
|
||||
delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_extractor, allow_nil: true
|
||||
delegate :site_name, :domain, to: :parsed_url
|
||||
|
||||
def match?
|
||||
@@ -27,7 +27,7 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
# XXX the base strategy excludes artist tags from the translated tags; we don't want that for moebooru.
|
||||
# XXX the base extractor excludes artist tags from the translated tags; we don't want that for moebooru.
|
||||
def translated_tags
|
||||
tags.map(&:first).flat_map(&method(:translate_tag)).uniq.sort
|
||||
end
|
||||
@@ -50,8 +50,8 @@ module Sources
|
||||
memoize :api_response
|
||||
|
||||
concerning :HelperMethods do
|
||||
def sub_strategy
|
||||
@sub_strategy ||= Sources::Strategies.find(api_response[:source], default: nil)
|
||||
def sub_extractor
|
||||
@sub_extractor ||= Source::Extractor.find(api_response[:source], default: nil)
|
||||
end
|
||||
|
||||
def file_ext
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Newgrounds
|
||||
module Sources
|
||||
module Strategies
|
||||
class Newgrounds < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Newgrounds < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Newgrounds === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::NicoSeiga
|
||||
module Sources
|
||||
module Strategies
|
||||
class NicoSeiga < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class NicoSeiga < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.nico_seiga_user_session.present?
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Nijie
|
||||
module Sources
|
||||
module Strategies
|
||||
class Nijie < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Nijie < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.nijie_login.present? && Danbooru.config.nijie_password.present?
|
||||
end
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
class Null < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Null < Source::Extractor
|
||||
def image_urls
|
||||
[url]
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Pixiv
|
||||
module Sources
|
||||
module Strategies
|
||||
class Pixiv < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Pixiv < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.pixiv_phpsessid.present?
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::PixivSketch
|
||||
module Sources
|
||||
module Strategies
|
||||
class PixivSketch < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class PixivSketch < Source::Extractor
|
||||
def match?
|
||||
Source::URL::PixivSketch === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Plurk
|
||||
module Sources
|
||||
module Strategies
|
||||
class Plurk < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Plurk < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Plurk === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Skeb
|
||||
module Sources
|
||||
module Strategies
|
||||
class Skeb < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Skeb < Extractor
|
||||
def match?
|
||||
Source::URL::Skeb === parsed_url
|
||||
end
|
||||
@@ -1,9 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Tinami
|
||||
module Sources
|
||||
module Strategies
|
||||
class Tinami < Base
|
||||
module Source
|
||||
class Extractor
|
||||
class Tinami < Source::Extractor
|
||||
|
||||
def match?
|
||||
Source::URL::Tinami === parsed_url
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Tumblr
|
||||
module Sources::Strategies
|
||||
class Tumblr < Base
|
||||
class Source::Extractor
|
||||
class Tumblr < Source::Extractor
|
||||
def self.enabled?
|
||||
Danbooru.config.tumblr_consumer_key.present?
|
||||
end
|
||||
@@ -1,8 +1,8 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Twitter
|
||||
module Sources::Strategies
|
||||
class Twitter < Base
|
||||
class Source::Extractor
|
||||
class Twitter < Source::Extractor
|
||||
# List of hashtag suffixes attached to tag other names
|
||||
# Ex: 西住みほ生誕祭2019 should be checked as 西住みほ
|
||||
# The regexes will not match if there is nothing preceding
|
||||
@@ -1,10 +1,9 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see Source::URL::Weibo
|
||||
module Sources
|
||||
module Strategies
|
||||
class Weibo < Base
|
||||
|
||||
module Source
|
||||
class Extractor
|
||||
class Weibo < Source::Extractor
|
||||
def match?
|
||||
Source::URL::Weibo === parsed_url
|
||||
end
|
||||
@@ -3,8 +3,8 @@
|
||||
# A Source::URL is a URL from a source site, such as Twitter, Pixiv, etc. Each site has a
|
||||
# subclass responsible for parsing and extracting information from URLs for that site.
|
||||
#
|
||||
# Sources::Strategies are the main user of Source::URLs. Each Source::URL subclass usually
|
||||
# has a corresponding strategy for extracting data from that site.
|
||||
# Source::Extractors are the main user of Source::URLs. Each Source::URL subclass usually
|
||||
# has a corresponding extractor for extracting data from that site.
|
||||
#
|
||||
# To add a new site, create a subclass of Source::URL and implement `#match?` to define
|
||||
# which URLs belong to the site, and `#parse` to parse and extract information from the URL.
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Sources
|
||||
class Error < StandardError
|
||||
end
|
||||
end
|
||||
@@ -1,35 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
def self.all
|
||||
[
|
||||
Strategies::Pixiv,
|
||||
Strategies::Twitter,
|
||||
Strategies::Tumblr,
|
||||
Strategies::NicoSeiga,
|
||||
Strategies::DeviantArt,
|
||||
Strategies::Moebooru,
|
||||
Strategies::Nijie,
|
||||
Strategies::ArtStation,
|
||||
Strategies::HentaiFoundry,
|
||||
Strategies::Fanbox,
|
||||
Strategies::Mastodon,
|
||||
Strategies::PixivSketch,
|
||||
Strategies::Weibo,
|
||||
Strategies::Newgrounds,
|
||||
Strategies::Skeb,
|
||||
Strategies::Lofter,
|
||||
Strategies::Foundation,
|
||||
Strategies::Plurk,
|
||||
Strategies::Tinami,
|
||||
Strategies::Fantia,
|
||||
]
|
||||
end
|
||||
|
||||
def self.find(url, referer = nil, default: Strategies::Null)
|
||||
strategy = all.lazy.map { |s| s.new(url, referer) }.detect(&:match?)
|
||||
strategy || default&.new(url, referer)
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,259 +0,0 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# This is a collection of strategies for extracting information about a
|
||||
# resource. At a minimum it tries to extract the artist name and a canonical
|
||||
# URL to download the image from. But it can also be used to normalize a URL
|
||||
# for use with the artist finder.
|
||||
#
|
||||
# Design Principles
|
||||
#
|
||||
# In general you should minimize state. You can safely assume that <tt>url</tt>
|
||||
# and <tt>referer_url</tt> will not change over the lifetime of an instance,
|
||||
# so you can safely memoize methods and their results. A common pattern is
|
||||
# conditionally making an external API call and parsing its response. You should
|
||||
# make this call on demand and memoize the response.
|
||||
|
||||
module Sources
|
||||
module Strategies
|
||||
class Base
|
||||
extend Memoist
|
||||
|
||||
# The http timeout to download a file.
|
||||
DOWNLOAD_TIMEOUT = 60
|
||||
|
||||
attr_reader :url, :referer_url, :parsed_url, :parsed_referer
|
||||
delegate :site_name, to: :parsed_url
|
||||
|
||||
# Should return true if all prerequisites for using the strategy are met.
|
||||
# Return false if the strategy requires api keys that have not been configured.
|
||||
def self.enabled?
|
||||
true
|
||||
end
|
||||
|
||||
# Extract information from a target URL. The target URL may be either a
|
||||
# direct image URL, or the URL of a HTML page containing one or more
|
||||
# images.
|
||||
#
|
||||
# The referer URL is optionally provided when uploading direct image URLs
|
||||
# with the bookmarklet. This lets us find the page containing the image
|
||||
# for sites like Twitter, where the image URL by itself doesn't have
|
||||
# enough information to find the page containing the image.
|
||||
#
|
||||
# @param url [String] The target URL
|
||||
# @param referer_url [String] If the the target URL is an image URL, this
|
||||
# should be the HTML page containing the image.
|
||||
def initialize(url, referer_url = nil)
|
||||
@url = url.to_s
|
||||
@referer_url = referer_url&.to_s
|
||||
|
||||
@parsed_url = Source::URL.parse(url)
|
||||
@parsed_referer = Source::URL.parse(referer_url) if referer_url.present?
|
||||
@parsed_referer = nil if parsed_url&.site_name != parsed_referer&.site_name
|
||||
end
|
||||
|
||||
# Should return true if this strategy should be used. By default, checks
|
||||
# if the main url belongs to any of the domains associated with this site.
|
||||
def match?
|
||||
false
|
||||
end
|
||||
|
||||
# Whatever <tt>url</tt> is, this method should return the direct links
|
||||
# to the canonical binary files. It should not be an HTML page. It should
|
||||
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
|
||||
# downloader will fetch and save to disk.
|
||||
def image_urls
|
||||
[]
|
||||
end
|
||||
|
||||
# The URL of the page containing the image, or nil if it can't be found.
|
||||
#
|
||||
# The source of the post will be set to the page URL if it's not possible
|
||||
# to convert the image URL to a page URL for this site.
|
||||
#
|
||||
# For example, for sites like Twitter and Tumblr, it's not possible to
|
||||
# convert image URLs to page URLs, so the page URL will be used as the
|
||||
# source for these sites. For sites like Pixiv and DeviantArt, it is
|
||||
# possible to convert image URLs to page URLs, so the image URL will be
|
||||
# used as the source for these sites. This is determined by whether
|
||||
# `Source::URL#page_url` returns a URL or nil.
|
||||
#
|
||||
# @return [String, nil]
|
||||
def page_url
|
||||
nil
|
||||
end
|
||||
|
||||
# A name to suggest as the artist's tag name when creating a new artist.
|
||||
# This should usually be the artist's account name.
|
||||
def tag_name
|
||||
artist_name
|
||||
end
|
||||
|
||||
# The artists's primary name. If an artist has both a display name and an
|
||||
# account name, this should be the display name.
|
||||
def artist_name
|
||||
nil
|
||||
end
|
||||
|
||||
# A list of all names associated with the artist. These names will be suggested
|
||||
# as other names when creating a new artist.
|
||||
def other_names
|
||||
[artist_name, tag_name].compact.uniq
|
||||
end
|
||||
|
||||
# A link to the artist's profile page on the site. This will be used for
|
||||
# artist finding purposes, so it needs to match the URL in the artist entry.
|
||||
def profile_url
|
||||
nil
|
||||
end
|
||||
|
||||
# A list of all profile urls associated with the artist. These urls will
|
||||
# be suggested when creating a new artist.
|
||||
def profile_urls
|
||||
[profile_url].compact
|
||||
end
|
||||
|
||||
def artist_commentary_title
|
||||
nil
|
||||
end
|
||||
|
||||
def artist_commentary_desc
|
||||
nil
|
||||
end
|
||||
|
||||
# Download the file at the given url. Raises Danbooru::Http::DownloadError if the download fails, or
|
||||
# Danbooru::Http::FileTooLargeError if the file is too large.
|
||||
#
|
||||
# @return [MediaFile] the downloaded file
|
||||
def download_file!(download_url)
|
||||
response, file = http_downloader.download_media(download_url)
|
||||
file
|
||||
end
|
||||
|
||||
# A http client for API requests.
|
||||
def http
|
||||
Danbooru::Http.new.proxy.public_only
|
||||
end
|
||||
memoize :http
|
||||
|
||||
# A http client for downloading files.
|
||||
def http_downloader
|
||||
http.timeout(DOWNLOAD_TIMEOUT).max_size(Danbooru.config.max_file_size).use(:spoof_referrer).use(:unpolish_cloudflare)
|
||||
end
|
||||
memoize :http_downloader
|
||||
|
||||
def artists
|
||||
ArtistFinder.find_artists(profile_url)
|
||||
end
|
||||
|
||||
# A new artist entry with suggested defaults for when the artist doesn't
|
||||
# exist. Used in Artist.new_with_defaults to prefill the new artist form.
|
||||
def new_artist
|
||||
Artist.new(
|
||||
name: tag_name,
|
||||
other_names: other_names,
|
||||
url_string: profile_urls.join("\n")
|
||||
)
|
||||
end
|
||||
|
||||
def tags
|
||||
(@tags || []).uniq
|
||||
end
|
||||
|
||||
def normalized_tags
|
||||
tags.map { |tag, _url| normalize_tag(tag) }.sort.uniq
|
||||
end
|
||||
|
||||
def normalize_tag(tag)
|
||||
WikiPage.normalize_other_name(tag).downcase
|
||||
end
|
||||
|
||||
def translated_tags
|
||||
translated_tags = normalized_tags.flat_map(&method(:translate_tag)).uniq.sort
|
||||
translated_tags.reject(&:artist?)
|
||||
end
|
||||
|
||||
# Given a tag from the source site, should return an array of corresponding Danbooru tags.
|
||||
def translate_tag(untranslated_tag)
|
||||
return [] if untranslated_tag.blank?
|
||||
|
||||
translated_tag_names = WikiPage.active.other_names_include(untranslated_tag).uniq.pluck(:title)
|
||||
translated_tag_names = TagAlias.to_aliased(translated_tag_names)
|
||||
translated_tags = Tag.where(name: translated_tag_names)
|
||||
|
||||
if translated_tags.empty?
|
||||
normalized_name = TagAlias.to_aliased([Tag.normalize_name(untranslated_tag)])
|
||||
translated_tags = Tag.nonempty.where(name: normalized_name)
|
||||
end
|
||||
|
||||
translated_tags
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_title
|
||||
self.class.to_dtext(artist_commentary_title)
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
self.class.to_dtext(artist_commentary_desc)
|
||||
end
|
||||
|
||||
# A search query that should return any posts that were previously
|
||||
# uploaded from the same source. These may be duplicates, or they may be
|
||||
# other posts from the same gallery.
|
||||
def related_posts_search_query
|
||||
"source:#{url}"
|
||||
end
|
||||
|
||||
def related_posts(limit = 5)
|
||||
Post.system_tag_match(related_posts_search_query).paginate(1, limit: limit)
|
||||
end
|
||||
memoize :related_posts
|
||||
|
||||
# A hash containing the results of any API calls made by the strategy. For debugging purposes only.
|
||||
def api_response
|
||||
nil
|
||||
end
|
||||
|
||||
def to_h
|
||||
{
|
||||
:artist => {
|
||||
:name => artist_name,
|
||||
:tag_name => tag_name,
|
||||
:other_names => other_names,
|
||||
:profile_url => profile_url,
|
||||
:profile_urls => profile_urls
|
||||
},
|
||||
:artists => artists.as_json(include: :sorted_urls),
|
||||
:image_urls => image_urls,
|
||||
:page_url => page_url,
|
||||
:tags => tags,
|
||||
:normalized_tags => normalized_tags,
|
||||
:translated_tags => translated_tags,
|
||||
:artist_commentary => {
|
||||
:title => artist_commentary_title,
|
||||
:description => artist_commentary_desc,
|
||||
:dtext_title => dtext_artist_commentary_title,
|
||||
:dtext_description => dtext_artist_commentary_desc
|
||||
},
|
||||
:api_response => api_response.to_h
|
||||
}
|
||||
end
|
||||
|
||||
def to_json(*_args)
|
||||
to_h.to_json
|
||||
end
|
||||
|
||||
def http_exists?(url)
|
||||
http_downloader.head(url).status.success?
|
||||
end
|
||||
|
||||
# Convert commentary to dtext by stripping html tags. Sites can override
|
||||
# this to customize how their markup is translated to dtext.
|
||||
def self.to_dtext(text)
|
||||
text = text.to_s
|
||||
text = Rails::Html::FullSanitizer.new.sanitize(text, encode_special_chars: false)
|
||||
text = CGI.unescapeHTML(text)
|
||||
text
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -156,7 +156,7 @@ class Artist < ApplicationRecord
|
||||
end
|
||||
|
||||
if source.present?
|
||||
artist = Sources::Strategies.find(source).new_artist
|
||||
artist = Source::Extractor.find(source).new_artist
|
||||
artist.attributes = params
|
||||
else
|
||||
artist = Artist.new(params)
|
||||
@@ -252,7 +252,7 @@ class Artist < ApplicationRecord
|
||||
elsif query.include?("*")
|
||||
where(id: ArtistURL.where_like(:url, query).select(:artist_id))
|
||||
elsif query =~ %r{\Ahttps?://}i
|
||||
url = Sources::Strategies.find(query).profile_url || query
|
||||
url = Source::Extractor.find(query).profile_url || query
|
||||
ArtistFinder.find_artists(url)
|
||||
else
|
||||
where(id: ArtistURL.where_like(:url, "*#{query}*").select(:artist_id))
|
||||
|
||||
@@ -53,7 +53,7 @@ class ArtistURL < ApplicationRecord
|
||||
elsif url.include?("*")
|
||||
where_ilike(attr, url)
|
||||
else
|
||||
profile_url = Sources::Strategies.find(url).profile_url || url
|
||||
profile_url = Source::Extractor.find(url).profile_url || url
|
||||
where(attr => normalize_normalized_url(profile_url))
|
||||
end
|
||||
end
|
||||
|
||||
@@ -87,7 +87,7 @@ class Post < ApplicationRecord
|
||||
)
|
||||
|
||||
if add_artist_tag
|
||||
tag_string = "#{tag_string} #{upload_media_asset.source_strategy&.artists.to_a.map(&:tag).map(&:name).join(" ")}".strip
|
||||
tag_string = "#{tag_string} #{upload_media_asset.source_extractor&.artists.to_a.map(&:tag).map(&:name).join(" ")}".strip
|
||||
tag_string += " " if tag_string.present?
|
||||
end
|
||||
|
||||
@@ -1157,7 +1157,7 @@ class Post < ApplicationRecord
|
||||
self.pixiv_id = nil
|
||||
return unless web_source?
|
||||
|
||||
site = Sources::Strategies::Pixiv.new(source)
|
||||
site = Source::Extractor::Pixiv.new(source)
|
||||
if site.match?
|
||||
self.pixiv_id = site.illust_id
|
||||
end
|
||||
@@ -1265,7 +1265,7 @@ class Post < ApplicationRecord
|
||||
return if !web_source?
|
||||
return if has_tag?("artist_request") || has_tag?("official_art")
|
||||
return if tags.any?(&:artist?)
|
||||
return if Sources::Strategies.find(source).is_a?(Sources::Strategies::Null)
|
||||
return if Source::Extractor.find(source).is_a?(Source::Extractor::Null)
|
||||
|
||||
new_artist_path = Routes.new_artist_path(artist: { source: source })
|
||||
warnings.add(:base, "Artist tag is required. \"Create new artist tag\":[#{new_artist_path}]. Ask on the forum if you need naming help")
|
||||
|
||||
@@ -117,8 +117,8 @@ class Upload < ApplicationRecord
|
||||
UploadMediaAsset.new(file: file.tempfile, source_url: "file://#{file.original_filename}")
|
||||
end
|
||||
elsif source.present?
|
||||
page_url = source_strategy.page_url
|
||||
image_urls = source_strategy.image_urls
|
||||
page_url = source_extractor.page_url
|
||||
image_urls = source_extractor.image_urls
|
||||
|
||||
if image_urls.empty?
|
||||
raise Error, "#{source} doesn't contain any images"
|
||||
@@ -136,14 +136,14 @@ class Upload < ApplicationRecord
|
||||
update!(status: "error", error: e.message)
|
||||
end
|
||||
|
||||
def source_strategy
|
||||
def source_extractor
|
||||
return nil if source.blank?
|
||||
Sources::Strategies.find(source, referer_url)
|
||||
Source::Extractor.find(source, referer_url)
|
||||
end
|
||||
|
||||
def self.available_includes
|
||||
[:uploader, :upload_media_assets, :media_assets, :posts]
|
||||
end
|
||||
|
||||
memoize :source_strategy
|
||||
memoize :source_extractor
|
||||
end
|
||||
|
||||
@@ -79,9 +79,9 @@ class UploadMediaAsset < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
def source_strategy
|
||||
def source_extractor
|
||||
return nil if source_url.blank?
|
||||
Sources::Strategies.find(source_url, page_url)
|
||||
Source::Extractor.find(source_url, page_url)
|
||||
end
|
||||
|
||||
def async_process_upload!
|
||||
@@ -98,7 +98,7 @@ class UploadMediaAsset < ApplicationRecord
|
||||
if file.present?
|
||||
media_file = MediaFile.open(file)
|
||||
else
|
||||
media_file = source_strategy.download_file!(source_url)
|
||||
media_file = source_extractor.download_file!(source_url)
|
||||
end
|
||||
|
||||
MediaAsset.upload!(media_file) do |media_asset|
|
||||
@@ -120,5 +120,5 @@ class UploadMediaAsset < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
memoize :source_strategy
|
||||
memoize :source_extractor
|
||||
end
|
||||
|
||||
@@ -2,8 +2,8 @@
|
||||
<div id="a-index">
|
||||
<h1>Upload</h1>
|
||||
|
||||
<% if policy(@upload).show? && @upload.source_strategy.present? %>
|
||||
<%= render_source_data(@upload.source_strategy) %>
|
||||
<% if policy(@upload).show? && @upload.source_extractor.present? %>
|
||||
<%= render_source_data(@upload.source_extractor) %>
|
||||
<% end %>
|
||||
|
||||
<div class="border-b mb-4 flex flex-wrap gap-4">
|
||||
|
||||
@@ -34,10 +34,10 @@
|
||||
</p>
|
||||
</div>
|
||||
|
||||
<%= render "uploads/related_posts", source: upload_media_asset.source_strategy %>
|
||||
<%= render "uploads/related_posts", source: upload_media_asset.source_extractor %>
|
||||
|
||||
<% if upload_media_asset.source_strategy.present? %>
|
||||
<%= render_source_data(upload_media_asset.source_strategy) %>
|
||||
<% if upload_media_asset.source_extractor.present? %>
|
||||
<%= render_source_data(upload_media_asset.source_extractor) %>
|
||||
<% end %>
|
||||
|
||||
<% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>
|
||||
|
||||
Reference in New Issue
Block a user