sources: remove canonical_url method.

Refactor source strategies to remove the `canonical_url` method.

`canonical_url` returned the URL that should be used as the source of
the post after upload. Now we simply use `Source::URL#page_url` to
determine the source after upload. If the source is an image URL that is
convertible to a page URL, then the image URL is used as the source. If
the source is an image URL that is not convertible to a page URL, then
the page URL is used as the source.

This simplifies source strategies so that all they have to care about is
implementing the `Source::URL#page_url` and `Sources::Strategies#page_url`
methods, and the preferred source will be chosen for posts automatically.
This commit is contained in:
evazion
2022-03-23 20:47:17 -05:00
parent eef6e8f55f
commit 4ef8178bd1
29 changed files with 88 additions and 130 deletions

View File

@@ -31,7 +31,7 @@
<th>Artist</th>
<td>
<%= external_link_to @source.profile_url, @source.artist_name %>
(<%= link_to "Create new artist", new_artist_path(artist: { source: @source.canonical_url }) %>)
(<%= link_to "Create new artist", new_artist_path(artist: { source: @source.page_url || @source.url }) %>)
</td>
</tr>
<% end %>

View File

@@ -9,7 +9,7 @@ class PostReplacementProcessor
end
def process!
media_file = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile)
media_file, image_url = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile)
if Post.where.not(id: post.id).exists?(md5: media_file.md5)
raise "Duplicate of post ##{Post.find_by_md5(media_file.md5).id}"
@@ -23,8 +23,10 @@ class PostReplacementProcessor
if replacement.replacement_file.present?
canonical_url = "file://#{replacement.replacement_file.original_filename}"
elsif Source::URL.page_url(image_url).present?
canonical_url = image_url
else
canonical_url = Sources::Strategies.find(replacement.replacement_url).canonical_url
canonical_url = replacement.replacement_url
end
replacement.replacement_url = canonical_url
@@ -70,6 +72,9 @@ class PostReplacementProcessor
image_urls = strategy.image_urls
raise "#{source_url} contains multiple images" if image_urls.size > 1
strategy.download_file!(image_urls.first)
image_url = image_urls.first
file = strategy.download_file!(image_url)
[file, image_url]
end
end

View File

@@ -88,6 +88,9 @@ module Source
# When viewing a post, the source will be shown as the page URL if it's
# possible to convert the source from an image URL to a page URL.
#
# When uploading a post, the source will be set to the image URL if the
# image URL is convertible to a page URL. Otherwise, it's set to the page URL.
#
# Examples:
#
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png

View File

@@ -103,7 +103,6 @@ class Source::URL::Moebooru < Source::URL
filename_prefix = "Konachan.com%20-%20"
end
# try to include the post_id so that it's saved for posterity in the canonical_url.
if post_id.present?
"https://#{file_host}/image/#{md5}/#{filename_prefix}#{post_id}.#{file_ext}"
else

View File

@@ -31,9 +31,5 @@ module Sources
strategy = all.lazy.map { |s| s.new(url, referer) }.detect(&:match?)
strategy || default&.new(url, referer)
end
def self.canonical(url, referer)
find(url, referer).canonical_url
end
end
end

View File

@@ -65,20 +65,23 @@ module Sources
[]
end
# Whatever <tt>url</tt> is, this method should return a link to the HTML
# page containing the resource. It should not be a binary file. It will
# eventually be assigned as the source for the post, but it does not
# represent what the downloader will fetch.
# The URL of the page containing the image, or nil if it can't be found.
#
# The source of the post will be set to the page URL if it's not possible
# to convert the image URL to a page URL for this site.
#
# For example, for sites like Twitter and Tumblr, it's not possible to
# convert image URLs to page URLs, so the page URL will be used as the
# source for these sites. For sites like Pixiv and DeviantArt, it is
# possible to convert image URLs to page URLs, so the image URL will be
# used as the source for these sites. This is determined by whether
# `Source::URL#page_url` returns a URL or nil.
#
# @return [String, nil]
def page_url
nil
end
# This will be the url stored in posts. Typically this is the page
# url, but on some sites it may be preferable to store the image url.
def canonical_url
page_url || image_urls.first
end
# A name to suggest as the artist's tag name when creating a new artist.
# This should usually be the artist's account name.
def tag_name
@@ -197,7 +200,7 @@ module Sources
# uploaded from the same source. These may be duplicates, or they may be
# other posts from the same gallery.
def related_posts_search_query
"source:#{canonical_url}"
"source:#{url}"
end
def related_posts(limit = 5)
@@ -222,7 +225,6 @@ module Sources
:artists => artists.as_json(include: :sorted_urls),
:image_urls => image_urls,
:page_url => page_url,
:canonical_url => canonical_url,
:tags => tags,
:normalized_tags => normalized_tags,
:translated_tags => translated_tags,

View File

@@ -47,10 +47,6 @@ module Sources
parsed_url.username || parsed_referer&.username
end
def canonical_url
image_urls.first
end
def profile_url
return nil if artist_name.blank?
"https://www.hentai-foundry.com/user/#{artist_name}"

View File

@@ -21,10 +21,6 @@ module Sources
"https://#{domain}/post/show/#{post_id}"
end
def canonical_url
image_urls.first
end
def tags
api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]

View File

@@ -75,15 +75,6 @@ module Sources
DText.from_html(artist_commentary_desc)
end
# The image url should be the post source, if we can generate the page url from the image url.
def canonical_url
if page_url.present?
url
else
page_url
end
end
def user_name
parsed_url.username || parsed_referer&.username
end

View File

@@ -11,10 +11,6 @@ module Sources
nil
end
def canonical_url
url
end
def artists
ArtistFinder.find_artists(url)
end

View File

@@ -57,10 +57,6 @@ module Sources
"https://www.pixiv.net/artworks/#{illust_id}"
end
def canonical_url
image_urls.first
end
def profile_url
if api_illust[:userId].present?
"https://www.pixiv.net/users/#{api_illust[:userId]}"
@@ -128,7 +124,7 @@ module Sources
end
def related_posts_search_query
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{canonical_url}"
illust_id.present? ? "pixiv:#{illust_id}" : "source:#{url}"
end
def is_ugoira?

View File

@@ -65,6 +65,20 @@ class UploadMediaAsset < ApplicationRecord
source_url.starts_with?("file://")
end
# The source of the post after upload.
def canonical_url
return source_url if file_upload?
# If the image URL is convertible to a page URL, or the page URL couldn't
# be found, then use the image URL as the source of the post. Otherwise,
# use the page URL.
if Source::URL.page_url(source_url).present? || page_url.blank?
source_url
else
page_url
end
end
def source_strategy
return nil if source_url.blank?
Sources::Strategies.find(source_url, page_url)

View File

@@ -40,7 +40,7 @@
<%= render_source_data(upload_media_asset.source_strategy) %>
<% end %>
<% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.source_strategy.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>
<% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %>
<%= edit_form_for(post, html: { id: "form" }) do |f| %>
<%= hidden_field_tag :media_asset_id, media_asset.id %> <%# used by iqdb javascript %>
<%= hidden_field_tag :upload_media_asset_id, upload_media_asset.id %>

View File

@@ -11,8 +11,8 @@ module Sources
assert_equal(["https://cdn.artstation.com/p/assets/images/images/000/705/368/4k/jey-rain-one1.jpg?1443931773"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.canonical_url)
should "get the page url" do
assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.page_url)
end
should "get the profile" do
@@ -43,8 +43,8 @@ module Sources
assert_equal([url], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.canonical_url)
should "get the page url" do
assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.page_url)
end
should "get the profile" do
@@ -94,7 +94,6 @@ module Sources
assert_equal(["https://cdn.artstation.com/p/assets/images/images/006/029/978/4k/amama-l-z.jpg"], site.image_urls)
assert_equal("https://amama.artstation.com/projects/4BWW2", site.page_url)
assert_equal("https://amama.artstation.com/projects/4BWW2", site.canonical_url)
assert_equal("https://www.artstation.com/amama", site.profile_url)
assert_equal("amama", site.artist_name)
assert_nothing_raised { site.to_h }
@@ -178,7 +177,6 @@ module Sources
assert_equal("fiship", site.artist_name)
assert_equal("https://www.artstation.com/fiship", site.profile_url)
assert_equal(url, site.page_url)
assert_equal(url, site.canonical_url)
assert_equal([], site.image_urls)
assert_nothing_raised { site.to_h }
end

View File

@@ -20,7 +20,6 @@ module Sources
assert_equal("aeror404", @site.artist_name)
assert_equal("https://www.deviantart.com/aeror404", @site.profile_url)
assert_equal("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484", @site.page_url)
assert_equal("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484", @site.canonical_url)
assert_equal("Holiday Elincia", @site.artist_commentary_title)
end
end
@@ -31,7 +30,6 @@ module Sources
@artist = create(:artist, name: "nickbeja", url_string: "https://nickbeja.deviantart.com")
assert_equal(["https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png"], @site.image_urls)
assert_equal(@site.page_url, @site.canonical_url)
assert_equal("nickbeja", @site.artist_name)
assert_equal("https://www.deviantart.com/nickbeja", @site.profile_url)
assert_equal("https://www.deviantart.com/nickbeja/art/Mindflayer-Girl01-708675884", @site.page_url)
@@ -53,7 +51,6 @@ module Sources
assert_equal("https://www.deviantart.com/noizave", @site.profile_url)
assert_equal("test, no download", @site.artist_commentary_title)
assert_equal("https://www.deviantart.com/noizave/art/test-no-download-697415967", @site.page_url)
assert_equal("https://www.deviantart.com/noizave/art/test-no-download-697415967", @site.canonical_url)
end
end
@@ -69,7 +66,6 @@ module Sources
assert_equal("https://www.deviantart.com/len1", @site.profile_url)
assert_equal("All that Glitters II", @site.artist_commentary_title)
assert_equal("https://www.deviantart.com/len1/art/All-that-Glitters-II-774592781", @site.page_url)
assert_equal("https://www.deviantart.com/len1/art/All-that-Glitters-II-774592781", @site.canonical_url)
end
end
@@ -84,7 +80,6 @@ module Sources
assert_equal("hideyoshi", @site.artist_name)
assert_equal("https://www.deviantart.com/hideyoshi", @site.profile_url)
assert_equal("https://www.deviantart.com/hideyoshi/art/Legend-of-Galactic-Heroes-635721022", @site.page_url)
assert_equal("https://www.deviantart.com/hideyoshi/art/Legend-of-Galactic-Heroes-635721022", @site.canonical_url)
assert_equal(%w[barbarossa bay brunhild flare hangar odin planet ship spaceship sun sunset brünhild legendsofgalacticheroes].sort, @site.tags.map(&:first).sort)
end
end
@@ -98,7 +93,6 @@ module Sources
assert_downloaded(3619, @site.image_urls.sole)
assert_equal("https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408", @site.page_url)
assert_equal("https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408", @site.canonical_url)
assert_equal("noizave", @site.artist_name)
assert_equal("https://www.deviantart.com/noizave", @site.profile_url)
@@ -156,7 +150,6 @@ module Sources
assert_equal("47ness", @site.artist_name)
assert_equal("https://www.deviantart.com/47ness", @site.profile_url)
assert_nil(@site.page_url)
assert_equal(@site.image_urls.sole, @site.canonical_url)
assert_equal([@artist], @site.artists)
assert_nothing_raised { @site.to_h }
end
@@ -173,7 +166,6 @@ module Sources
assert_equal("47ness", @site.artist_name)
assert_equal("https://www.deviantart.com/47ness", @site.profile_url)
assert_equal("https://www.deviantart.com/47ness/art/Cool-Like-Me-54339311", @site.page_url)
assert_equal(@site.page_url, @site.canonical_url)
assert_equal([@artist], @site.artists)
assert_nothing_raised { @site.to_h }
end
@@ -195,7 +187,6 @@ module Sources
assert_nil(@site.artist_name)
assert_nil(@site.profile_url)
assert_nil(@site.page_url)
assert_equal(@site.image_urls.sole, @site.canonical_url)
assert_equal([], @site.artists)
assert_nothing_raised { @site.to_h }
end
@@ -209,7 +200,6 @@ module Sources
assert_equal("edsfox", @site.artist_name)
assert_equal("https://www.deviantart.com/edsfox", @site.profile_url)
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", @site.page_url)
assert_equal(@site.page_url, @site.canonical_url)
assert_equal([@artist], @site.artists)
assert_nothing_raised { @site.to_h }
end
@@ -231,7 +221,6 @@ module Sources
assert_equal("edsfox", @site.artist_name)
assert_equal("https://www.deviantart.com/edsfox", @site.profile_url)
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", @site.page_url)
assert_equal(@site.page_url, @site.canonical_url)
assert_equal([@artist], @site.artists)
assert_nothing_raised { @site.to_h }
end
@@ -250,7 +239,6 @@ module Sources
@site = Sources::Strategies.find(@url, @ref)
assert_equal(@ref, @site.page_url)
assert_equal(@ref, @site.canonical_url)
assert_equal([@artist], @site.artists)
assert_nothing_raised { @site.to_h }
end

View File

@@ -120,7 +120,7 @@ module Sources
assert_nothing_raised { post.to_h }
assert_downloaded(750_484, post.image_urls.sole)
assert_equal("https://omu001.fanbox.cc", post.profile_url)
assert_equal(post.profile_url, post.canonical_url)
assert_equal(post.profile_url, post.page_url)
artist = FactoryBot.create(:artist, name: "omu", url_string: "https://omu001.fanbox.cc")
assert_equal([artist], post.artists)
end

View File

@@ -33,11 +33,6 @@ module Sources
assert_equal(["https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png"], @image_2.image_urls)
end
should "get the canonical url" do
assert_equal("https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png", @image_1.canonical_url)
assert_equal("https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png", @image_2.canonical_url)
end
should "download an image" do
assert_downloaded(1_349_887, @image_1.image_urls.sole)
assert_downloaded(1_349_887, @image_2.image_urls.sole)

View File

@@ -7,7 +7,6 @@ module Sources
assert_equal(site_name, site.site_name)
assert_equal([image_url], site.image_urls)
assert_equal(image_url, site.canonical_url)
assert_equal(page_url, site.page_url) if page_url.present?
assert_equal(tags.sort, site.tags.map(&:first).sort)
assert_equal(profile_url.to_s, site.profile_url.to_s)

View File

@@ -30,9 +30,9 @@ module Sources
assert_equal([@image_url], @image_2.image_urls)
end
should "get the canonical url" do
assert_equal(@url, @image_1.canonical_url)
assert_equal(@image_url, @image_2.canonical_url)
should "get the page url" do
assert_equal(@url, @image_1.page_url)
assert_equal(@url, @image_2.page_url)
end
should "download an image" do

View File

@@ -53,10 +53,10 @@ module Sources
9.times { |n| assert_match(expected[n], @site_3.image_urls[n]) }
end
should "get the canonical url" do
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", @site_1.canonical_url)
assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", @site_2.canonical_url)
assert_equal("https://seiga.nicovideo.jp/watch/mg470189", @site_3.canonical_url)
should "get the page url" do
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", @site_1.page_url)
assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", @site_2.page_url)
assert_equal("https://seiga.nicovideo.jp/watch/mg470189", @site_3.page_url)
end
should "get the tags" do
@@ -83,7 +83,7 @@ module Sources
site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i")
assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226!, site.image_urls.sole)
assert_match("https://seiga.nicovideo.jp/seiga/im6844226", site.canonical_url)
assert_match("https://seiga.nicovideo.jp/seiga/im6844226", site.page_url)
end
end
@@ -98,8 +98,8 @@ module Sources
assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/9146749!, @site.image_urls.sole)
end
should "set the correct source" do
assert_equal(@ref, @site.canonical_url)
should "get the page url" do
assert_equal(@ref, @site.page_url)
end
end

View File

@@ -51,8 +51,8 @@ module Sources
assert_downloaded(132_555, @site.image_urls.sole)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
should "get the page url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.page_url)
end
should "get the profile" do
@@ -120,8 +120,8 @@ module Sources
assert_equal(["https://pic.nijie.net/03/nijie_picture/728995_20170505014820_0.jpg"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
should "get the page url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.page_url)
end
should "get the profile" do
@@ -142,8 +142,8 @@ module Sources
assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url)
should "get the page url" do
assert_equal("https://nijie.info/view.php?id=213043", @site.page_url)
end
should "get the profile" do
@@ -191,7 +191,6 @@ module Sources
assert_nil(site.page_url)
assert_equal([image_url], site.image_urls)
assert_equal(image_url, site.canonical_url)
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
assert_nothing_raised { site.to_h }
end
@@ -202,7 +201,6 @@ module Sources
site = Sources::Strategies.find("https://pic.nijie.net/03/nijie_picture/diff/main/218856_4_236014_20170620101333.png")
assert_equal("https://nijie.info/view.php?id=218856", site.page_url)
assert_equal("https://nijie.info/view.php?id=218856", site.canonical_url)
assert_equal("https://nijie.info/members.php?id=236014", site.profile_url)
assert_equal("名無しのチンポップ", site.artist_name)
assert_equal([site.url], site.image_urls)
@@ -214,7 +212,6 @@ module Sources
site = Sources::Strategies.find("https://pic.nijie.net/04/nijie_picture/diff/main/287736_161475_20181112032855_1.png")
assert_equal("https://nijie.info/view.php?id=287736", site.page_url)
assert_equal("https://nijie.info/view.php?id=287736", site.canonical_url)
assert_equal("https://nijie.info/members.php?id=161475", site.profile_url)
assert_equal("みな本", site.artist_name)
assert_equal([site.url], site.image_urls)

View File

@@ -13,7 +13,6 @@ module Sources
should "find the metadata" do
assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls)
assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url)
assert_nil(@site.artist_name)
assert_nil(@site.profile_url)
assert_nothing_raised { @site.to_h }

View File

@@ -9,7 +9,6 @@ module Sources
assert_equal("Pixiv Sketch", source.site_name)
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9986983/8431631593768139653.jpg"], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.page_url)
assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@user_ejkv8372", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@user_ejkv8372", "https://www.pixiv.net/users/44772126"], source.profile_urls)
assert_equal("user_ejkv8372", source.artist_name)
@@ -25,7 +24,6 @@ module Sources
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg"], source.image_urls)
assert_nil(source.page_url)
assert_equal(source.url, source.canonical_url)
assert_nil(source.profile_url)
assert_equal([], source.profile_urls)
assert_nil(source.artist_name)
@@ -39,7 +37,6 @@ module Sources
source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg", "https://sketch.pixiv.net/items/8052785510155853613")
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@op-one", "https://www.pixiv.net/users/5903369"], source.profile_urls)
assert_equal("op-one", source.artist_name)
@@ -54,7 +51,6 @@ module Sources
assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/884876/4909517173982299587.jpg"], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.page_url)
assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@lithla", source.profile_url)
assert_equal(["https://sketch.pixiv.net/@lithla", "https://www.pixiv.net/users/4957"], source.profile_urls)
assert_equal("lithla", source.artist_name)
@@ -80,7 +76,6 @@ module Sources
https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg
], source.image_urls)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url)
assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url)
assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url)
assert_equal("op-one", source.artist_name)
assert_equal(<<~EOS.normalize_whitespace, source.artist_commentary_desc)

View File

@@ -73,7 +73,6 @@ module Sources
@site = Sources::Strategies.find("https://www.pixiv.net/en/artworks/64476642")
assert_equal(["https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg"], @site.image_urls)
assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url)
assert_equal("https://www.pixiv.net/artworks/64476642", @site.page_url)
@site = Sources::Strategies.find("https://www.pixiv.net/artworks/64476642")
@@ -152,8 +151,8 @@ module Sources
assert_equal(["https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg"], @site.image_urls)
end
should "get the full size image url for the canonical url" do
assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url)
should "get the page url" do
assert_equal("https://www.pixiv.net/artworks/64476642", @site.page_url)
end
end

View File

@@ -19,8 +19,8 @@ module Sources
assert_equal(["https://skeb.imgix.net/uploads/origins/307941e9-dbe0-4e4b-93d4-94accdaff9a0?bg=%23fff&auto=format&w=800&s=e0ddfb1fa0d9f23797b338598aae78fa"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://skeb.jp/@kokuzou593/works/45", @site.canonical_url)
should "get the page url" do
assert_equal("https://skeb.jp/@kokuzou593/works/45", @site.page_url)
end
should "find the correct artist" do

View File

@@ -18,7 +18,6 @@ module Sources
assert_equal("This is a test.", @site.artist_commentary_desc)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url)
assert_match("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmga0s-a345a815-2436-4ab5-8941-492011e1bff6.png", @site.image_urls.sole)
end
end
@@ -35,7 +34,6 @@ module Sources
assert_equal("This is a test.", @site.artist_commentary_desc)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url)
assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url)
assert_match("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmga0s-a345a815-2436-4ab5-8941-492011e1bff6.png", @site.image_urls.sole)
end
end

View File

@@ -70,8 +70,8 @@ module Sources
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url)
end
should "get the artist" do
@@ -92,8 +92,8 @@ module Sources
assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url)
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url)
end
should "get the tags" do
@@ -116,7 +116,7 @@ module Sources
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.canonical_url)
assert_equal(@ref, site.page_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
@@ -128,7 +128,7 @@ module Sources
assert_equal("noizave", site.artist_name)
assert_equal("https://noizave.tumblr.com", site.profile_url)
assert_equal(["tag1", "tag2"], site.tags.map(&:first))
assert_equal(@ref, site.canonical_url)
assert_equal(@ref, site.page_url)
assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls)
end
end
@@ -165,7 +165,7 @@ module Sources
]
assert_equal(urls, site.image_urls)
assert_equal(url, site.canonical_url)
assert_equal(url, site.page_url)
end
end
@@ -184,8 +184,8 @@ module Sources
assert_match("test answer", @site.artist_commentary_desc)
end
should "get the canonical url" do
assert_equal("https://noizave.tumblr.com/post/171237880542", @site.canonical_url)
should "get the page url" do
assert_equal("https://noizave.tumblr.com/post/171237880542", @site.page_url)
end
end
@@ -196,7 +196,7 @@ module Sources
strategy = Sources::Strategies.find(image_url, page_url)
assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url)
assert_equal(page_url, strategy.canonical_url)
assert_equal(page_url, strategy.page_url)
assert_downloaded(7_428_704, strategy.image_urls.sole)
end
end
@@ -209,7 +209,6 @@ module Sources
assert_equal("shimetsukage", site.artist_name)
assert_equal("https://shimetsukage.tumblr.com", site.profile_url)
assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.page_url)
assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.canonical_url)
assert_equal([], site.image_urls)
assert_equal([], site.tags)
end

View File

@@ -30,7 +30,7 @@ module Sources
should "get the correct urls" do
@site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129")
assert_equal(["https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"], @site.image_urls)
assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.canonical_url)
assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.page_url)
end
should "work when given a video thumbnail" do
@@ -71,8 +71,8 @@ module Sources
assert_equal(["https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url)
should "get the page url" do
assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.page_url)
end
end
@@ -84,7 +84,6 @@ module Sources
should "get the urls" do
assert_equal(["https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:orig"], @site.image_urls)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.page_url)
assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.canonical_url)
end
end
@@ -119,8 +118,8 @@ module Sources
assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls)
end
should "get the canonical url" do
assert_equal("https://twitter.com/nounproject/status/540944400767922176", @site.canonical_url)
should "get the page url" do
assert_equal("https://twitter.com/nounproject/status/540944400767922176", @site.page_url)
end
should "get the tags" do
@@ -174,7 +173,6 @@ module Sources
should "work" do
assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig"], @site.image_urls)
assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig", @site.canonical_url)
end
should "work for filenames containing dashes" do
@@ -192,8 +190,8 @@ module Sources
assert_equal("https://twitter.com/motty08111213", @site.profile_url)
end
should "get the canonical url" do
assert_equal("https://twitter.com/motty08111213/status/943446161586733056", @site.canonical_url)
should "get the page url" do
assert_equal("https://twitter.com/motty08111213/status/943446161586733056", @site.page_url)
end
end

View File

@@ -29,8 +29,8 @@ module Sources
assert_equal("https://www.weibo.com/u/5501756072", @site.profile_url)
end
should "set the right source" do
assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.canonical_url)
should "get the page url" do
assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.page_url)
end
should "download an image" do
@@ -79,12 +79,12 @@ module Sources
end
context "A multi-page upload" do
should "set the right source" do
should "get the page url" do
url = "https://wx1.sinaimg.cn/large/7eb64558gy1fnbryriihwj20dw104wtu.jpg"
ref = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t"
site = Sources::Strategies.find(url, ref)
assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.canonical_url)
assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.page_url)
end
end
@@ -106,7 +106,6 @@ module Sources
], @site.image_urls)
assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.page_url)
assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.canonical_url)
assert_equal("https://www.weibo.com/u/5501756072", @site.profile_url)
assert_equal(%w[fgo Alter组], @site.tags.map(&:first))
assert_equal("阿尔托莉雅厨", @site.artist_name)