From 4ef8178bd13fe07e60da6c3e699082166e8a5979 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 23 Mar 2022 20:47:17 -0500 Subject: [PATCH] sources: remove `canonical_url` method. Refactor source strategies to remove the `canonical_url` method. `canonical_url` returned the URL that should be used as the source of the post after upload. Now we simply use `Source::URL#page_url` to determine the source after upload. If the source is an image URL that is convertible to a page URL, then the image URL is used as the source. If the source is an image URL that is not convertible to a page URL, then the page URL is used as the source. This simplifies source strategies so that all they have to care about is implementing the `Source::URL#page_url` and `Sources::Strategies#page_url` methods, and the preferred source will be chosen for posts automatically. --- .../source_data_component.html.erb | 2 +- app/logical/post_replacement_processor.rb | 11 +++++--- app/logical/source/url.rb | 3 +++ app/logical/source/url/moebooru.rb | 1 - app/logical/sources/strategies.rb | 4 --- app/logical/sources/strategies/base.rb | 26 ++++++++++--------- .../sources/strategies/hentai_foundry.rb | 4 --- app/logical/sources/strategies/moebooru.rb | 4 --- app/logical/sources/strategies/newgrounds.rb | 9 ------- app/logical/sources/strategies/null.rb | 4 --- app/logical/sources/strategies/pixiv.rb | 6 +---- app/models/upload_media_asset.rb | 14 ++++++++++ .../uploads/_single_asset_upload.html.erb | 2 +- test/unit/sources/art_station_test.rb | 10 +++---- test/unit/sources/deviant_art_test.rb | 12 --------- test/unit/sources/fanbox_test.rb | 2 +- test/unit/sources/hentai_foundry_test.rb | 5 ---- test/unit/sources/moebooru_test.rb | 1 - test/unit/sources/newgrounds_test.rb | 6 ++--- test/unit/sources/nico_seiga_test.rb | 14 +++++----- test/unit/sources/nijie_test.rb | 15 +++++------ test/unit/sources/null_test.rb | 1 - test/unit/sources/pixiv_sketch_test.rb | 5 ---- test/unit/sources/pixiv_test.rb | 5 ++-- test/unit/sources/skeb_test.rb | 4 +-- test/unit/sources/stash_test.rb | 2 -- test/unit/sources/tumblr_test.rb | 21 +++++++-------- test/unit/sources/twitter_test.rb | 16 +++++------- test/unit/sources/weibo_test.rb | 9 +++---- 29 files changed, 88 insertions(+), 130 deletions(-) diff --git a/app/components/source_data_component/source_data_component.html.erb b/app/components/source_data_component/source_data_component.html.erb index b6de6acbf..91d8da430 100644 --- a/app/components/source_data_component/source_data_component.html.erb +++ b/app/components/source_data_component/source_data_component.html.erb @@ -31,7 +31,7 @@ Artist <%= external_link_to @source.profile_url, @source.artist_name %> - (<%= link_to "Create new artist", new_artist_path(artist: { source: @source.canonical_url }) %>) + (<%= link_to "Create new artist", new_artist_path(artist: { source: @source.page_url || @source.url }) %>) <% end %> diff --git a/app/logical/post_replacement_processor.rb b/app/logical/post_replacement_processor.rb index efba651f9..47da83460 100644 --- a/app/logical/post_replacement_processor.rb +++ b/app/logical/post_replacement_processor.rb @@ -9,7 +9,7 @@ class PostReplacementProcessor end def process! - media_file = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile) + media_file, image_url = get_file_for_upload(replacement.replacement_url, nil, replacement.replacement_file&.tempfile) if Post.where.not(id: post.id).exists?(md5: media_file.md5) raise "Duplicate of post ##{Post.find_by_md5(media_file.md5).id}" @@ -23,8 +23,10 @@ class PostReplacementProcessor if replacement.replacement_file.present? canonical_url = "file://#{replacement.replacement_file.original_filename}" + elsif Source::URL.page_url(image_url).present? + canonical_url = image_url else - canonical_url = Sources::Strategies.find(replacement.replacement_url).canonical_url + canonical_url = replacement.replacement_url end replacement.replacement_url = canonical_url @@ -70,6 +72,9 @@ class PostReplacementProcessor image_urls = strategy.image_urls raise "#{source_url} contains multiple images" if image_urls.size > 1 - strategy.download_file!(image_urls.first) + image_url = image_urls.first + file = strategy.download_file!(image_url) + + [file, image_url] end end diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 66deca667..930d051a4 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -88,6 +88,9 @@ module Source # When viewing a post, the source will be shown as the page URL if it's # possible to convert the source from an image URL to a page URL. # + # When uploading a post, the source will be set to the image URL if the + # image URL is convertible to a page URL. Otherwise, it's set to the page URL. + # # Examples: # # * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png diff --git a/app/logical/source/url/moebooru.rb b/app/logical/source/url/moebooru.rb index 086f70b64..6d09d598f 100644 --- a/app/logical/source/url/moebooru.rb +++ b/app/logical/source/url/moebooru.rb @@ -103,7 +103,6 @@ class Source::URL::Moebooru < Source::URL filename_prefix = "Konachan.com%20-%20" end - # try to include the post_id so that it's saved for posterity in the canonical_url. if post_id.present? "https://#{file_host}/image/#{md5}/#{filename_prefix}#{post_id}.#{file_ext}" else diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 4062b8d33..c59169286 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -31,9 +31,5 @@ module Sources strategy = all.lazy.map { |s| s.new(url, referer) }.detect(&:match?) strategy || default&.new(url, referer) end - - def self.canonical(url, referer) - find(url, referer).canonical_url - end end end diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index a03c578e3..3f87e5a2c 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -65,20 +65,23 @@ module Sources [] end - # Whatever url is, this method should return a link to the HTML - # page containing the resource. It should not be a binary file. It will - # eventually be assigned as the source for the post, but it does not - # represent what the downloader will fetch. + # The URL of the page containing the image, or nil if it can't be found. + # + # The source of the post will be set to the page URL if it's not possible + # to convert the image URL to a page URL for this site. + # + # For example, for sites like Twitter and Tumblr, it's not possible to + # convert image URLs to page URLs, so the page URL will be used as the + # source for these sites. For sites like Pixiv and DeviantArt, it is + # possible to convert image URLs to page URLs, so the image URL will be + # used as the source for these sites. This is determined by whether + # `Source::URL#page_url` returns a URL or nil. + # + # @return [String, nil] def page_url nil end - # This will be the url stored in posts. Typically this is the page - # url, but on some sites it may be preferable to store the image url. - def canonical_url - page_url || image_urls.first - end - # A name to suggest as the artist's tag name when creating a new artist. # This should usually be the artist's account name. def tag_name @@ -197,7 +200,7 @@ module Sources # uploaded from the same source. These may be duplicates, or they may be # other posts from the same gallery. def related_posts_search_query - "source:#{canonical_url}" + "source:#{url}" end def related_posts(limit = 5) @@ -222,7 +225,6 @@ module Sources :artists => artists.as_json(include: :sorted_urls), :image_urls => image_urls, :page_url => page_url, - :canonical_url => canonical_url, :tags => tags, :normalized_tags => normalized_tags, :translated_tags => translated_tags, diff --git a/app/logical/sources/strategies/hentai_foundry.rb b/app/logical/sources/strategies/hentai_foundry.rb index 52d89e893..07fd68d6b 100644 --- a/app/logical/sources/strategies/hentai_foundry.rb +++ b/app/logical/sources/strategies/hentai_foundry.rb @@ -47,10 +47,6 @@ module Sources parsed_url.username || parsed_referer&.username end - def canonical_url - image_urls.first - end - def profile_url return nil if artist_name.blank? "https://www.hentai-foundry.com/user/#{artist_name}" diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 9e5c249dd..277eeb0d5 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -21,10 +21,6 @@ module Sources "https://#{domain}/post/show/#{post_id}" end - def canonical_url - image_urls.first - end - def tags api_response[:tags].to_s.split.map do |tag| [tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"] diff --git a/app/logical/sources/strategies/newgrounds.rb b/app/logical/sources/strategies/newgrounds.rb index 778aa64a0..6b022424b 100644 --- a/app/logical/sources/strategies/newgrounds.rb +++ b/app/logical/sources/strategies/newgrounds.rb @@ -75,15 +75,6 @@ module Sources DText.from_html(artist_commentary_desc) end - # The image url should be the post source, if we can generate the page url from the image url. - def canonical_url - if page_url.present? - url - else - page_url - end - end - def user_name parsed_url.username || parsed_referer&.username end diff --git a/app/logical/sources/strategies/null.rb b/app/logical/sources/strategies/null.rb index a6a1cb01e..c7df33599 100644 --- a/app/logical/sources/strategies/null.rb +++ b/app/logical/sources/strategies/null.rb @@ -11,10 +11,6 @@ module Sources nil end - def canonical_url - url - end - def artists ArtistFinder.find_artists(url) end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index d19d20742..d180b1f35 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -57,10 +57,6 @@ module Sources "https://www.pixiv.net/artworks/#{illust_id}" end - def canonical_url - image_urls.first - end - def profile_url if api_illust[:userId].present? "https://www.pixiv.net/users/#{api_illust[:userId]}" @@ -128,7 +124,7 @@ module Sources end def related_posts_search_query - illust_id.present? ? "pixiv:#{illust_id}" : "source:#{canonical_url}" + illust_id.present? ? "pixiv:#{illust_id}" : "source:#{url}" end def is_ugoira? diff --git a/app/models/upload_media_asset.rb b/app/models/upload_media_asset.rb index ac8514a8c..841963ead 100644 --- a/app/models/upload_media_asset.rb +++ b/app/models/upload_media_asset.rb @@ -65,6 +65,20 @@ class UploadMediaAsset < ApplicationRecord source_url.starts_with?("file://") end + # The source of the post after upload. + def canonical_url + return source_url if file_upload? + + # If the image URL is convertible to a page URL, or the page URL couldn't + # be found, then use the image URL as the source of the post. Otherwise, + # use the page URL. + if Source::URL.page_url(source_url).present? || page_url.blank? + source_url + else + page_url + end + end + def source_strategy return nil if source_url.blank? Sources::Strategies.find(source_url, page_url) diff --git a/app/views/uploads/_single_asset_upload.html.erb b/app/views/uploads/_single_asset_upload.html.erb index 6f4e01741..f68acf8bf 100644 --- a/app/views/uploads/_single_asset_upload.html.erb +++ b/app/views/uploads/_single_asset_upload.html.erb @@ -40,7 +40,7 @@ <%= render_source_data(upload_media_asset.source_strategy) %> <% end %> - <% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.source_strategy.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %> + <% post = Post.new_from_upload(upload_media_asset, add_artist_tag: true, source: upload_media_asset.canonical_url, **permitted_attributes(Post).to_h.symbolize_keys) %> <%= edit_form_for(post, html: { id: "form" }) do |f| %> <%= hidden_field_tag :media_asset_id, media_asset.id %> <%# used by iqdb javascript %> <%= hidden_field_tag :upload_media_asset_id, upload_media_asset.id %> diff --git a/test/unit/sources/art_station_test.rb b/test/unit/sources/art_station_test.rb index 5ce62bd72..07e35da91 100644 --- a/test/unit/sources/art_station_test.rb +++ b/test/unit/sources/art_station_test.rb @@ -11,8 +11,8 @@ module Sources assert_equal(["https://cdn.artstation.com/p/assets/images/images/000/705/368/4k/jey-rain-one1.jpg?1443931773"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.canonical_url) + should "get the page url" do + assert_equal("https://jeyrain.artstation.com/projects/04XA4", @site.page_url) end should "get the profile" do @@ -43,8 +43,8 @@ module Sources assert_equal([url], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.canonical_url) + should "get the page url" do + assert_equal("https://dantewontdie.artstation.com/projects/YZK5q", @site.page_url) end should "get the profile" do @@ -94,7 +94,6 @@ module Sources assert_equal(["https://cdn.artstation.com/p/assets/images/images/006/029/978/4k/amama-l-z.jpg"], site.image_urls) assert_equal("https://amama.artstation.com/projects/4BWW2", site.page_url) - assert_equal("https://amama.artstation.com/projects/4BWW2", site.canonical_url) assert_equal("https://www.artstation.com/amama", site.profile_url) assert_equal("amama", site.artist_name) assert_nothing_raised { site.to_h } @@ -178,7 +177,6 @@ module Sources assert_equal("fiship", site.artist_name) assert_equal("https://www.artstation.com/fiship", site.profile_url) assert_equal(url, site.page_url) - assert_equal(url, site.canonical_url) assert_equal([], site.image_urls) assert_nothing_raised { site.to_h } end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index 38320fdd2..f82ed4e6a 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -20,7 +20,6 @@ module Sources assert_equal("aeror404", @site.artist_name) assert_equal("https://www.deviantart.com/aeror404", @site.profile_url) assert_equal("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484", @site.page_url) - assert_equal("https://www.deviantart.com/aeror404/art/Holiday-Elincia-424551484", @site.canonical_url) assert_equal("Holiday Elincia", @site.artist_commentary_title) end end @@ -31,7 +30,6 @@ module Sources @artist = create(:artist, name: "nickbeja", url_string: "https://nickbeja.deviantart.com") assert_equal(["https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png"], @site.image_urls) - assert_equal(@site.page_url, @site.canonical_url) assert_equal("nickbeja", @site.artist_name) assert_equal("https://www.deviantart.com/nickbeja", @site.profile_url) assert_equal("https://www.deviantart.com/nickbeja/art/Mindflayer-Girl01-708675884", @site.page_url) @@ -53,7 +51,6 @@ module Sources assert_equal("https://www.deviantart.com/noizave", @site.profile_url) assert_equal("test, no download", @site.artist_commentary_title) assert_equal("https://www.deviantart.com/noizave/art/test-no-download-697415967", @site.page_url) - assert_equal("https://www.deviantart.com/noizave/art/test-no-download-697415967", @site.canonical_url) end end @@ -69,7 +66,6 @@ module Sources assert_equal("https://www.deviantart.com/len1", @site.profile_url) assert_equal("All that Glitters II", @site.artist_commentary_title) assert_equal("https://www.deviantart.com/len1/art/All-that-Glitters-II-774592781", @site.page_url) - assert_equal("https://www.deviantart.com/len1/art/All-that-Glitters-II-774592781", @site.canonical_url) end end @@ -84,7 +80,6 @@ module Sources assert_equal("hideyoshi", @site.artist_name) assert_equal("https://www.deviantart.com/hideyoshi", @site.profile_url) assert_equal("https://www.deviantart.com/hideyoshi/art/Legend-of-Galactic-Heroes-635721022", @site.page_url) - assert_equal("https://www.deviantart.com/hideyoshi/art/Legend-of-Galactic-Heroes-635721022", @site.canonical_url) assert_equal(%w[barbarossa bay brunhild flare hangar odin planet ship spaceship sun sunset brünhild legendsofgalacticheroes].sort, @site.tags.map(&:first).sort) end end @@ -98,7 +93,6 @@ module Sources assert_downloaded(3619, @site.image_urls.sole) assert_equal("https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408", @site.page_url) - assert_equal("https://www.deviantart.com/noizave/art/test-post-please-ignore-685436408", @site.canonical_url) assert_equal("noizave", @site.artist_name) assert_equal("https://www.deviantart.com/noizave", @site.profile_url) @@ -156,7 +150,6 @@ module Sources assert_equal("47ness", @site.artist_name) assert_equal("https://www.deviantart.com/47ness", @site.profile_url) assert_nil(@site.page_url) - assert_equal(@site.image_urls.sole, @site.canonical_url) assert_equal([@artist], @site.artists) assert_nothing_raised { @site.to_h } end @@ -173,7 +166,6 @@ module Sources assert_equal("47ness", @site.artist_name) assert_equal("https://www.deviantart.com/47ness", @site.profile_url) assert_equal("https://www.deviantart.com/47ness/art/Cool-Like-Me-54339311", @site.page_url) - assert_equal(@site.page_url, @site.canonical_url) assert_equal([@artist], @site.artists) assert_nothing_raised { @site.to_h } end @@ -195,7 +187,6 @@ module Sources assert_nil(@site.artist_name) assert_nil(@site.profile_url) assert_nil(@site.page_url) - assert_equal(@site.image_urls.sole, @site.canonical_url) assert_equal([], @site.artists) assert_nothing_raised { @site.to_h } end @@ -209,7 +200,6 @@ module Sources assert_equal("edsfox", @site.artist_name) assert_equal("https://www.deviantart.com/edsfox", @site.profile_url) assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", @site.page_url) - assert_equal(@site.page_url, @site.canonical_url) assert_equal([@artist], @site.artists) assert_nothing_raised { @site.to_h } end @@ -231,7 +221,6 @@ module Sources assert_equal("edsfox", @site.artist_name) assert_equal("https://www.deviantart.com/edsfox", @site.profile_url) assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", @site.page_url) - assert_equal(@site.page_url, @site.canonical_url) assert_equal([@artist], @site.artists) assert_nothing_raised { @site.to_h } end @@ -250,7 +239,6 @@ module Sources @site = Sources::Strategies.find(@url, @ref) assert_equal(@ref, @site.page_url) - assert_equal(@ref, @site.canonical_url) assert_equal([@artist], @site.artists) assert_nothing_raised { @site.to_h } end diff --git a/test/unit/sources/fanbox_test.rb b/test/unit/sources/fanbox_test.rb index 54c9ab4d6..6f1f16fec 100644 --- a/test/unit/sources/fanbox_test.rb +++ b/test/unit/sources/fanbox_test.rb @@ -120,7 +120,7 @@ module Sources assert_nothing_raised { post.to_h } assert_downloaded(750_484, post.image_urls.sole) assert_equal("https://omu001.fanbox.cc", post.profile_url) - assert_equal(post.profile_url, post.canonical_url) + assert_equal(post.profile_url, post.page_url) artist = FactoryBot.create(:artist, name: "omu", url_string: "https://omu001.fanbox.cc") assert_equal([artist], post.artists) end diff --git a/test/unit/sources/hentai_foundry_test.rb b/test/unit/sources/hentai_foundry_test.rb index 9304904ac..b97abb02e 100644 --- a/test/unit/sources/hentai_foundry_test.rb +++ b/test/unit/sources/hentai_foundry_test.rb @@ -33,11 +33,6 @@ module Sources assert_equal(["https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png"], @image_2.image_urls) end - should "get the canonical url" do - assert_equal("https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png", @image_1.canonical_url) - assert_equal("https://pictures.hentai-foundry.com/a/Afrobull/795025/Afrobull-795025-kuroeda.png", @image_2.canonical_url) - end - should "download an image" do assert_downloaded(1_349_887, @image_1.image_urls.sole) assert_downloaded(1_349_887, @image_2.image_urls.sole) diff --git a/test/unit/sources/moebooru_test.rb b/test/unit/sources/moebooru_test.rb index 8c9351b4c..a8a9d68b8 100644 --- a/test/unit/sources/moebooru_test.rb +++ b/test/unit/sources/moebooru_test.rb @@ -7,7 +7,6 @@ module Sources assert_equal(site_name, site.site_name) assert_equal([image_url], site.image_urls) - assert_equal(image_url, site.canonical_url) assert_equal(page_url, site.page_url) if page_url.present? assert_equal(tags.sort, site.tags.map(&:first).sort) assert_equal(profile_url.to_s, site.profile_url.to_s) diff --git a/test/unit/sources/newgrounds_test.rb b/test/unit/sources/newgrounds_test.rb index bdc7dbc5e..280f66284 100644 --- a/test/unit/sources/newgrounds_test.rb +++ b/test/unit/sources/newgrounds_test.rb @@ -30,9 +30,9 @@ module Sources assert_equal([@image_url], @image_2.image_urls) end - should "get the canonical url" do - assert_equal(@url, @image_1.canonical_url) - assert_equal(@image_url, @image_2.canonical_url) + should "get the page url" do + assert_equal(@url, @image_1.page_url) + assert_equal(@url, @image_2.page_url) end should "download an image" do diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index 16557dd6b..b675644ed 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -53,10 +53,10 @@ module Sources 9.times { |n| assert_match(expected[n], @site_3.image_urls[n]) } end - should "get the canonical url" do - assert_equal("https://seiga.nicovideo.jp/image/source/4937663", @site_1.canonical_url) - assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", @site_2.canonical_url) - assert_equal("https://seiga.nicovideo.jp/watch/mg470189", @site_3.canonical_url) + should "get the page url" do + assert_equal("https://seiga.nicovideo.jp/image/source/4937663", @site_1.page_url) + assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", @site_2.page_url) + assert_equal("https://seiga.nicovideo.jp/watch/mg470189", @site_3.page_url) end should "get the tags" do @@ -83,7 +83,7 @@ module Sources site = Sources::Strategies.find("https://lohas.nicoseiga.jp/thumb/6844226i") assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/6844226!, site.image_urls.sole) - assert_match("https://seiga.nicovideo.jp/seiga/im6844226", site.canonical_url) + assert_match("https://seiga.nicovideo.jp/seiga/im6844226", site.page_url) end end @@ -98,8 +98,8 @@ module Sources assert_match(%r!https?://lohas.nicoseiga.jp/priv/[a-f0-9]{40}/[0-9]+/9146749!, @site.image_urls.sole) end - should "set the correct source" do - assert_equal(@ref, @site.canonical_url) + should "get the page url" do + assert_equal(@ref, @site.page_url) end end diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index 7e948c829..6287591a3 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -51,8 +51,8 @@ module Sources assert_downloaded(132_555, @site.image_urls.sole) end - should "get the canonical url" do - assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url) + should "get the page url" do + assert_equal("https://nijie.info/view.php?id=213043", @site.page_url) end should "get the profile" do @@ -120,8 +120,8 @@ module Sources assert_equal(["https://pic.nijie.net/03/nijie_picture/728995_20170505014820_0.jpg"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url) + should "get the page url" do + assert_equal("https://nijie.info/view.php?id=213043", @site.page_url) end should "get the profile" do @@ -142,8 +142,8 @@ module Sources assert_equal(["https://pic.nijie.net/07/nijie/17/95/728995/illust/0_0_403fdd541191110c_c25585.jpg"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://nijie.info/view.php?id=213043", @site.canonical_url) + should "get the page url" do + assert_equal("https://nijie.info/view.php?id=213043", @site.page_url) end should "get the profile" do @@ -191,7 +191,6 @@ module Sources assert_nil(site.page_url) assert_equal([image_url], site.image_urls) - assert_equal(image_url, site.canonical_url) assert_equal("https://nijie.info/members.php?id=236014", site.profile_url) assert_nothing_raised { site.to_h } end @@ -202,7 +201,6 @@ module Sources site = Sources::Strategies.find("https://pic.nijie.net/03/nijie_picture/diff/main/218856_4_236014_20170620101333.png") assert_equal("https://nijie.info/view.php?id=218856", site.page_url) - assert_equal("https://nijie.info/view.php?id=218856", site.canonical_url) assert_equal("https://nijie.info/members.php?id=236014", site.profile_url) assert_equal("名無しのチンポップ", site.artist_name) assert_equal([site.url], site.image_urls) @@ -214,7 +212,6 @@ module Sources site = Sources::Strategies.find("https://pic.nijie.net/04/nijie_picture/diff/main/287736_161475_20181112032855_1.png") assert_equal("https://nijie.info/view.php?id=287736", site.page_url) - assert_equal("https://nijie.info/view.php?id=287736", site.canonical_url) assert_equal("https://nijie.info/members.php?id=161475", site.profile_url) assert_equal("みな本", site.artist_name) assert_equal([site.url], site.image_urls) diff --git a/test/unit/sources/null_test.rb b/test/unit/sources/null_test.rb index 94063d82b..3525bcb6c 100644 --- a/test/unit/sources/null_test.rb +++ b/test/unit/sources/null_test.rb @@ -13,7 +13,6 @@ module Sources should "find the metadata" do assert_equal(["http://oremuhax.x0.com/yoro1603.jpg"], @site.image_urls) - assert_equal("http://oremuhax.x0.com/yoro1603.jpg", @site.canonical_url) assert_nil(@site.artist_name) assert_nil(@site.profile_url) assert_nothing_raised { @site.to_h } diff --git a/test/unit/sources/pixiv_sketch_test.rb b/test/unit/sources/pixiv_sketch_test.rb index 8da37f8ff..058944fbc 100644 --- a/test/unit/sources/pixiv_sketch_test.rb +++ b/test/unit/sources/pixiv_sketch_test.rb @@ -9,7 +9,6 @@ module Sources assert_equal("Pixiv Sketch", source.site_name) assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9986983/8431631593768139653.jpg"], source.image_urls) assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.page_url) - assert_equal("https://sketch.pixiv.net/items/5835314698645024323", source.canonical_url) assert_equal("https://sketch.pixiv.net/@user_ejkv8372", source.profile_url) assert_equal(["https://sketch.pixiv.net/@user_ejkv8372", "https://www.pixiv.net/users/44772126"], source.profile_urls) assert_equal("user_ejkv8372", source.artist_name) @@ -25,7 +24,6 @@ module Sources assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg"], source.image_urls) assert_nil(source.page_url) - assert_equal(source.url, source.canonical_url) assert_nil(source.profile_url) assert_equal([], source.profile_urls) assert_nil(source.artist_name) @@ -39,7 +37,6 @@ module Sources source = Sources::Strategies.find("https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg", "https://sketch.pixiv.net/items/8052785510155853613") assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url) - assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url) assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url) assert_equal(["https://sketch.pixiv.net/@op-one", "https://www.pixiv.net/users/5903369"], source.profile_urls) assert_equal("op-one", source.artist_name) @@ -54,7 +51,6 @@ module Sources assert_equal(["https://img-sketch.pixiv.net/uploads/medium/file/884876/4909517173982299587.jpg"], source.image_urls) assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.page_url) - assert_equal("https://sketch.pixiv.net/items/193462611994864256", source.canonical_url) assert_equal("https://sketch.pixiv.net/@lithla", source.profile_url) assert_equal(["https://sketch.pixiv.net/@lithla", "https://www.pixiv.net/users/4957"], source.profile_urls) assert_equal("lithla", source.artist_name) @@ -80,7 +76,6 @@ module Sources https://img-sketch.pixiv.net/uploads/medium/file/9988973/7216948861306830496.jpg ], source.image_urls) assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.page_url) - assert_equal("https://sketch.pixiv.net/items/8052785510155853613", source.canonical_url) assert_equal("https://sketch.pixiv.net/@op-one", source.profile_url) assert_equal("op-one", source.artist_name) assert_equal(<<~EOS.normalize_whitespace, source.artist_commentary_desc) diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index 76fd5c2c5..b40c31289 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -73,7 +73,6 @@ module Sources @site = Sources::Strategies.find("https://www.pixiv.net/en/artworks/64476642") assert_equal(["https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg"], @site.image_urls) - assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url) assert_equal("https://www.pixiv.net/artworks/64476642", @site.page_url) @site = Sources::Strategies.find("https://www.pixiv.net/artworks/64476642") @@ -152,8 +151,8 @@ module Sources assert_equal(["https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg"], @site.image_urls) end - should "get the full size image url for the canonical url" do - assert_equal("https://i.pximg.net/img-original/img/2017/08/18/00/09/21/64476642_p0.jpg", @site.canonical_url) + should "get the page url" do + assert_equal("https://www.pixiv.net/artworks/64476642", @site.page_url) end end diff --git a/test/unit/sources/skeb_test.rb b/test/unit/sources/skeb_test.rb index c1b98c44e..45b1414ed 100644 --- a/test/unit/sources/skeb_test.rb +++ b/test/unit/sources/skeb_test.rb @@ -19,8 +19,8 @@ module Sources assert_equal(["https://skeb.imgix.net/uploads/origins/307941e9-dbe0-4e4b-93d4-94accdaff9a0?bg=%23fff&auto=format&w=800&s=e0ddfb1fa0d9f23797b338598aae78fa"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://skeb.jp/@kokuzou593/works/45", @site.canonical_url) + should "get the page url" do + assert_equal("https://skeb.jp/@kokuzou593/works/45", @site.page_url) end should "find the correct artist" do diff --git a/test/unit/sources/stash_test.rb b/test/unit/sources/stash_test.rb index 37ff8912a..d3b39f561 100644 --- a/test/unit/sources/stash_test.rb +++ b/test/unit/sources/stash_test.rb @@ -18,7 +18,6 @@ module Sources assert_equal("This is a test.", @site.artist_commentary_desc) assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) - assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) assert_match("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmga0s-a345a815-2436-4ab5-8941-492011e1bff6.png", @site.image_urls.sole) end end @@ -35,7 +34,6 @@ module Sources assert_equal("This is a test.", @site.artist_commentary_desc) assert_equal("https://sta.sh/0wxs31o7nn2", @site.page_url) - assert_equal("https://sta.sh/0wxs31o7nn2", @site.canonical_url) assert_match("https://wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/83d3eb4d-13e5-4aea-a08f-8d4331d033c4/dcmga0s-a345a815-2436-4ab5-8941-492011e1bff6.png", @site.image_urls.sole) end end diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 27b05d1e9..a9fd4ab4b 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -70,8 +70,8 @@ module Sources assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url) + should "get the page url" do + assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url) end should "get the artist" do @@ -92,8 +92,8 @@ module Sources assert_equal(["https://media.tumblr.com/3bbfcbf075ddf969c996641b264086fd/tumblr_os2buiIOt51wsfqepo1_1280.png"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://noizave.tumblr.com/post/162206271767", @site.canonical_url) + should "get the page url" do + assert_equal("https://noizave.tumblr.com/post/162206271767", @site.page_url) end should "get the tags" do @@ -116,7 +116,7 @@ module Sources assert_equal("noizave", site.artist_name) assert_equal("https://noizave.tumblr.com", site.profile_url) assert_equal(["tag1", "tag2"], site.tags.map(&:first)) - assert_equal(@ref, site.canonical_url) + assert_equal(@ref, site.page_url) assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls) end end @@ -128,7 +128,7 @@ module Sources assert_equal("noizave", site.artist_name) assert_equal("https://noizave.tumblr.com", site.profile_url) assert_equal(["tag1", "tag2"], site.tags.map(&:first)) - assert_equal(@ref, site.canonical_url) + assert_equal(@ref, site.page_url) assert_equal(["https://media.tumblr.com/7c4d2c6843466f92c3dd0516e749ec35/tumblr_orwwptNBCE1wsfqepo2_1280.jpg"], site.image_urls) end end @@ -165,7 +165,7 @@ module Sources ] assert_equal(urls, site.image_urls) - assert_equal(url, site.canonical_url) + assert_equal(url, site.page_url) end end @@ -184,8 +184,8 @@ module Sources assert_match("test answer", @site.artist_commentary_desc) end - should "get the canonical url" do - assert_equal("https://noizave.tumblr.com/post/171237880542", @site.canonical_url) + should "get the page url" do + assert_equal("https://noizave.tumblr.com/post/171237880542", @site.page_url) end end @@ -196,7 +196,7 @@ module Sources strategy = Sources::Strategies.find(image_url, page_url) assert_match(%r{/3dfdab77d913ad1ea59f22407d6ac6f3/b1764aa0f9c378d0-23/s\d+x\d+/}i, image_url) - assert_equal(page_url, strategy.canonical_url) + assert_equal(page_url, strategy.page_url) assert_downloaded(7_428_704, strategy.image_urls.sole) end end @@ -209,7 +209,6 @@ module Sources assert_equal("shimetsukage", site.artist_name) assert_equal("https://shimetsukage.tumblr.com", site.profile_url) assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.page_url) - assert_equal("https://shimetsukage.tumblr.com/post/176805588268", site.canonical_url) assert_equal([], site.image_urls) assert_equal([], site.tags) end diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index 4ea10aca2..51c927709 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -30,7 +30,7 @@ module Sources should "get the correct urls" do @site = Sources::Strategies.find("https://twitter.com/CincinnatiZoo/status/859073537713328129") assert_equal(["https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4"], @site.image_urls) - assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.canonical_url) + assert_equal("https://twitter.com/CincinnatiZoo/status/859073537713328129", @site.page_url) end should "work when given a video thumbnail" do @@ -71,8 +71,8 @@ module Sources assert_equal(["https://pbs.twimg.com/media/C1kt72yVEAEGpOv.jpg:orig"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.canonical_url) + should "get the page url" do + assert_equal("https://twitter.com/aranobu/status/817736083567820800", @site.page_url) end end @@ -84,7 +84,6 @@ module Sources should "get the urls" do assert_equal(["https://pbs.twimg.com/media/B7jfc1JCcAEyeJh.png:orig"], @site.image_urls) assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.page_url) - assert_equal("https://twitter.com/Strangestone/status/556440271961858051", @site.canonical_url) end end @@ -119,8 +118,8 @@ module Sources assert_equal(["https://pbs.twimg.com/media/B4HSEP5CUAA4xyu.png:orig"], @site.image_urls) end - should "get the canonical url" do - assert_equal("https://twitter.com/nounproject/status/540944400767922176", @site.canonical_url) + should "get the page url" do + assert_equal("https://twitter.com/nounproject/status/540944400767922176", @site.page_url) end should "get the tags" do @@ -174,7 +173,6 @@ module Sources should "work" do assert_equal(["https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig"], @site.image_urls) - assert_equal("https://pbs.twimg.com/media/EBGp2YdUYAA19Uj.jpg:orig", @site.canonical_url) end should "work for filenames containing dashes" do @@ -192,8 +190,8 @@ module Sources assert_equal("https://twitter.com/motty08111213", @site.profile_url) end - should "get the canonical url" do - assert_equal("https://twitter.com/motty08111213/status/943446161586733056", @site.canonical_url) + should "get the page url" do + assert_equal("https://twitter.com/motty08111213/status/943446161586733056", @site.page_url) end end diff --git a/test/unit/sources/weibo_test.rb b/test/unit/sources/weibo_test.rb index 08d443b39..e6c860b79 100644 --- a/test/unit/sources/weibo_test.rb +++ b/test/unit/sources/weibo_test.rb @@ -29,8 +29,8 @@ module Sources assert_equal("https://www.weibo.com/u/5501756072", @site.profile_url) end - should "set the right source" do - assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.canonical_url) + should "get the page url" do + assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.page_url) end should "download an image" do @@ -79,12 +79,12 @@ module Sources end context "A multi-page upload" do - should "set the right source" do + should "get the page url" do url = "https://wx1.sinaimg.cn/large/7eb64558gy1fnbryriihwj20dw104wtu.jpg" ref = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t" site = Sources::Strategies.find(url, ref) - assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.canonical_url) + assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.page_url) end end @@ -106,7 +106,6 @@ module Sources ], @site.image_urls) assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.page_url) - assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.canonical_url) assert_equal("https://www.weibo.com/u/5501756072", @site.profile_url) assert_equal(%w[fgo Alter组], @site.tags.map(&:first)) assert_equal("阿尔托莉雅厨", @site.artist_name)