sources: remove canonical_url method.

Refactor source strategies to remove the `canonical_url` method.

`canonical_url` returned the URL that should be used as the source of
the post after upload. Now we simply use `Source::URL#page_url` to
determine the source after upload. If the source is an image URL that is
convertible to a page URL, then the image URL is used as the source. If
the source is an image URL that is not convertible to a page URL, then
the page URL is used as the source.

This simplifies source strategies so that all they have to care about is
implementing the `Source::URL#page_url` and `Sources::Strategies#page_url`
methods, and the preferred source will be chosen for posts automatically.
This commit is contained in:
evazion
2022-03-23 20:47:17 -05:00
parent eef6e8f55f
commit 4ef8178bd1
29 changed files with 88 additions and 130 deletions

View File

@@ -31,7 +31,7 @@
<th>Artist</th>
<td>
<%= external_link_to @source.profile_url, @source.artist_name %>
(<%= link_to "Create new artist", new_artist_path(artist: { source: @source.canonical_url }) %>)
(<%= link_to "Create new artist", new_artist_path(artist: { source: @source.page_url || @source.url }) %>)
</td>
</tr>
<% end %>

View File

@@ -9,7 +9,7 @@ class PostReplacementProcessor
end
def process!
media_file = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile)
media_file, image_url = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile)
if Post.where.not(id: post.id).exists?(md5: media_file.md5)
raise "Duplicate of post ##{Post.find_by_md5(media_file.md5).id}"
@@ -23,8 +23,10 @@ class PostReplacementProcessor
if replacement.replacement_file.present?
canonical_url = "file://#{replacement.replacement_file.original_filename}"
elsif Source::URL.page_url(image_url).present?
canonical_url = image_url
else
canonical_url = Sources::Strategies.find(replacement.replacement_url).canonical_url
canonical_url = replacement.replacement_url
end
replacement.replacement_url = canonical_url
@@ -70,6 +72,9 @@ class PostReplacementProcessor
image_urls = strategy.image_urls
raise "#{source_url} contains multiple images" if image_urls.size > 1
strategy.download_file!(image_urls.first)
image_url = image_urls.first
file = strategy.download_file!(image_url)
[file, image_url]
end
end

View File

@@ -88,6 +88,9 @@ module Source
# When viewing a post, the source will be shown as the page URL if it's
# possible to convert the source from an image URL to a page URL.
#
# When uploading a post, the source will be set to the image URL if the
# image URL is convertible to a page URL. Otherwise, it's set to the page URL.
#
# Examples:
#
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png

View File

@@ -103,7 +103,6 @@ class Source::URL::Moebooru < Source::URL
filename_prefix = "Konachan.com%20-%20"
end
# try to include the post_id so that it's saved for posterity in the canonical_url.
if post_id.present?
"https://#{file_host}/image/#{md5}/#{filename_prefix}#{post_id}.#{file_ext}"
else

View File

@@ -31,9 +31,5 @@ module Sources
strategy = all.lazy.map { |s| s.new(url, referer) }.detect(&:match?)
strategy || default&.new(url, referer)
end
def self.canonical(url, referer)
find(url, referer).canonical_url
end
end
end

View File

@@ -65,20 +65,23 @@ module Sources
[]
end
# Whatever <tt>url</tt> is, this method should return a link to the HTML
# page containing the resource. It should not be a binary file. It will
# eventually be assigned as the source for the post, but it does not
# represent what the downloader will fetch.
# The URL of the page containing the image, or nil if it can't be found.
#
# The source of the post will be set to the page URL if it's not possible
# to convert the image URL to a page URL for this site.
#
# For example, for sites like Twitter and Tumblr, it's not possible to
# convert image URLs to page URLs, so the page URL will be used as the
# source for these sites. For sites like Pixiv and DeviantArt, it is
# possible to convert image URLs to page URLs, so the image URL will be
# used as the source for these sites. This is determined by whether
# `Source::URL#page_url` returns a URL or nil.
#
# @return [String, nil]
def page_url
nil
end
# This will be the url stored in posts. Typically this is the page
# url, but on some sites it may be preferable to store the image url.
def canonical_url
page_url || image_urls.first
end
# A name to suggest as the artist's tag name when creating a new artist.
# This should usually be the artist's account name.
def tag_name
@@ -197,7 +200,7 @@ module Sources
# uploaded from the same source. These may be duplicates, or they may be
# other posts from the same gallery.
def related_posts_search_query
"source:#{canonical_url}"
"source:#{url}"
end
def related_posts(limit = 5)
@@ -222,7 +225,6 @@ module Sources
:artists => artists.as_json(include: :sorted_urls),
:image_urls => image_urls,
:page_url => page_url,
:canonical_url => canonical_url,
:tags => tags,
:normalized_tags => normalized_tags,
:translated_tags => translated_tags,

View File

@@ -47,10 +47,6 @@ module Sources
parsed_url.username || parsed_referer&.username
end
def canonical_url
image_urls.first
end
def profile_url
return nil if artist_name.blank?
"https://www.hentai-foundry.com/user/#{artist_name}"

View File

@@ -21,10 +21,6 @@ module Sources
"https://#{domain}/post/show/#{post_id}"
end
def canonical_url
image_urls.first
end
def tags
api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]

View File

@@ -75,15 +75,6 @@ module Sources
DText.from_html(artist_commentary_desc)
end
# The image url should be the post source, if we can generate the page url from the image url.
def canonical_url
if page_url.present?
url
else
page_url
end
end
def user_name
parsed_url.username || parsed_referer&.username
end

View File

@@ -11,10 +11,6 @@ module Sources
nil
end
def canonical_url
url
end
def artists
ArtistFinder.find_artists(url)
end

View File

@@ -57,10 +57,6 @@ module Sources
"https://www.pixiv.net/artworks/#{illust_id}"
end
def canonical_url
image_urls.first
end
def profile_url
if api_illust[:userId].present?
"https://www.pixiv.net/users/#{api_illust[:userId]}"
@@ -128,7 +124,7 @@ module Sources
end
def related_posts_search_query
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{canonical_url}"
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{url}"
end
def is_ugoira?

View File

@@ -65,6 +65,20 @@ class UploadMediaAsset < ApplicationRecord
source_url.starts_with?("file://")
end
# The source of the post after upload.
def canonical_url
return source_url if file_upload?
# If the image URL is convertible to a page URL, or the page URL couldn't
# be found, then use the image URL as the source of the post. Otherwise,
# use the page URL.
if Source::URL.page_url(source_url).present? || page_url.blank?
source_url
else
page_url
end
end
def source_strategy
return nil if source_url.blank?
Sources::Strategies.find(source_url, page_url)

View File

@@ -40,7 +40,7 @@
<%= render_source_data(upload_media_asset.source_strategy) %>
<% end %>
<% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.source_strategy.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>
<% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>
<%= edit_form_for(post, html: { id: "form" }) do |f| %>
<%= hidden_field_tag :media_asset_id, media_asset.id %> <%# used by iqdb javascript %>
<%= hidden_field_tag :upload_media_asset_id, upload_media_asset.id %>