From 2d9bba4abb5e0e5cbf3239ce01abf290a42f1a8d Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 1 May 2022 19:29:39 -0500 Subject: [PATCH] posts: automatically add the bad_link and bad_source tags. Automatically add the bad_link tag when the source is an image url from a known site, but it can't be converted to a page url (for example, a Twitter or Tumblr direct image link). Automatically add the bad_source tag when the source is from a known site, but it's not an image or page url (for example, a Twitter or Pixiv profile url) --- app/logical/source/url.rb | 15 ++++++++++ app/logical/source/url/null.rb | 9 +++++- app/models/post.rb | 17 +++++++++++ test/unit/post_test.rb | 53 +++++++++++++++++++++++++++++++++- 4 files changed, 92 insertions(+), 2 deletions(-) diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index d512e5afa..6423080ba 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -9,6 +9,14 @@ # To add a new site, create a subclass of Source::URL and implement `#match?` to define # which URLs belong to the site, and `#parse` to parse and extract information from the URL. # +# The following methods should be implemented by subclasses: +# +# * match? +# * parse +# * image_url? +# * page_url +# * profile_url +# # Source::URL is a subclass of Danbooru::URL, so it inherits some common utility methods # from there. # @@ -87,6 +95,13 @@ module Source self.class.name.demodulize.titleize end + # True if the URL is from a recognized site. False if the URL is from an unrecognized site. + # + # @return [Boolean] + def recognized? + true # overridden in Source::URL::Null to return false for unknown sites + end + # True if the URL is a direct image URL. # # Examples: diff --git a/app/logical/source/url/null.rb b/app/logical/source/url/null.rb index 4af80a5af..92ddd4ec4 100644 --- a/app/logical/source/url/null.rb +++ b/app/logical/source/url/null.rb @@ -38,6 +38,8 @@ class Source::URL::Null < Source::URL end def parse + @recognized = true + case [subdomain, domain, *path_segments] # http://about.me/rig22 @@ -240,7 +242,12 @@ class Source::URL::Null < Source::URL @page_url = "https://www.zerochan.net/#{@work_id}#full" else - nil + @recognized = false + end end + + def recognized? + @recognized + end end diff --git a/app/models/post.rb b/app/models/post.rb index 7cf9ce042..718a59f15 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -430,6 +430,23 @@ class Post < ApplicationRecord tags << "non-web_source" end + source_url = parsed_source + if source_url.present? && source_url.recognized? + # A bad_link is an image URL from a recognized site that can't be converted to a page URL. + if source_url.image_url? && source_url.page_url.nil? + tags << "bad_link" + else + tags -= ["bad_link"] + end + + # A bad_source is a source from a recognized site that isn't an image url or a page url. + if !source_url.image_url? && !source_url.page_url? + tags << "bad_source" + else + tags -= ["bad_source"] + end + end + # Allow only Flash files to be manually tagged as `animated`; GIFs, PNGs, videos, and ugoiras are automatically tagged. tags -= ["animated"] unless is_flash? tags << "animated" if media_asset.is_animated? diff --git a/test/unit/post_test.rb b/test/unit/post_test.rb index e50175dd0..cc5bb85fd 100644 --- a/test/unit/post_test.rb +++ b/test/unit/post_test.rb @@ -1115,11 +1115,62 @@ class PostTest < ActiveSupport::TestCase context "a post with a non-web source" do should "automatically add the non-web_source tag" do @post.update!(source: "this was once revealed to me in a dream") - @post.save! assert_equal("non-web_source tag1 tag2", @post.tag_string) end end + context "a post with a bad_link source" do + should "add the bad_link tag for known bad sources" do + @post.update!(source: "https://pbs.twimg.com/media/FQjQA1mVgAMcHLv.jpg:orig") + assert_equal("bad_link tag1 tag2", @post.tag_string) + + @post.update!(source: "https://media.tumblr.com/570edf684c7eb195d391115f8b18ca55/tumblr_pen2zwt3bK1uh1m9xo1_1280.png") + assert_equal("bad_link tag1 tag2", @post.tag_string) + end + + should "remove the bad_link tag for known good sources" do + @post.update!(tag_string: "bad_link tag1 tag2") + @post.update!(source: "https://i.pximg.net/img-original/img/2022/04/25/08/03/14/97867015_p0.png") + assert_equal("tag1 tag2", @post.tag_string) + end + + should "not add the bad_link tag for unknown sources" do + @post.update!(source: "https://www.example.com/image.jpg") + assert_equal("tag1 tag2", @post.tag_string) + end + + should "not remove the bad_link tag for unknown sources" do + @post.update!(tag_string: "bad_link tag1 tag2", source: "https://www.example.com/image.jpg") + assert_equal("bad_link tag1 tag2", @post.tag_string) + end + end + + context "a post with a bad source" do + should "add the bad_source tag for known bad sources" do + @post.update!(source: "https://twitter.com/danboorubot/") + assert_equal("bad_source tag1 tag2", @post.tag_string) + + @post.update!(source: "https://www.pixiv.net/en/users/6210796") + assert_equal("bad_source tag1 tag2", @post.tag_string) + end + + should "remove the bad_source tag for known good sources" do + @post.update!(tag_string: "bad_source tag1 tag2") + @post.update!(source: "https://twitter.com/kafun/status/1520766650907521024") + assert_equal("tag1 tag2", @post.tag_string) + end + + should "not add the bad_source tag for unknown sources" do + @post.update!(source: "https://www.example.com/image.html") + assert_equal("tag1 tag2", @post.tag_string) + end + + should "not remove the bad_source tag for unknown sources" do + @post.update!(tag_string: "bad_source tag1 tag2", source: "https://www.example.com/image.html") + assert_equal("bad_source tag1 tag2", @post.tag_string) + end + end + context "a post with a blank source" do should "remove the non-web_source tag" do @post.update!(source: "", tag_string: "non-web_source")