From dd0d9dff4a0257add173e08caad1118333be240e Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 23 Apr 2020 01:51:30 -0500 Subject: [PATCH] search: move misc search parsing helpers to PostQueryBuilder. * Move various search parser helper methods (`has_metatag?`, `is_single_tag?` et al) from PostSets and the Tag model to PostQueryBuilder. * Fix various minor bugs stemming from trying to check if a search query contains certain metatags using regexes or other adhoc techniques. --- app/helpers/posts_helper.rb | 2 +- app/logical/post_query_builder.rb | 53 +++++++++++++++++-- app/logical/post_sets/post.rb | 49 +++++------------ app/logical/related_tag_query.rb | 2 +- app/models/post.rb | 4 +- app/models/tag.rb | 35 ------------ app/models/upload.rb | 2 +- app/presenters/post_presenter.rb | 2 +- app/presenters/post_set_presenters/post.rb | 14 ++--- app/views/posts/index.html.erb | 2 +- .../posts/partials/index/_related.html.erb | 2 +- .../partials/index/_seo_meta_tags.html.erb | 2 +- test/unit/post_query_builder_test.rb | 35 ++++++++++++ test/unit/post_sets/post_test.rb | 14 ----- test/unit/tag_test.rb | 43 --------------- 15 files changed, 114 insertions(+), 147 deletions(-) diff --git a/app/helpers/posts_helper.rb b/app/helpers/posts_helper.rb index f9f2a543d..932e8a184 100644 --- a/app/helpers/posts_helper.rb +++ b/app/helpers/posts_helper.rb @@ -15,7 +15,7 @@ module PostsHelper def missed_post_search_count_js return unless post_search_counts_enabled? - return unless params[:ms] == "1" && @post_set.post_count == 0 && @post_set.is_single_tag? + return unless params[:ms] == "1" && @post_set.post_count == 0 && @post_set.query.is_single_term? sig = generate_reportbooru_signature(params[:tags]) render "posts/partials/index/missed_search_count", sig: sig diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index ba0d6e027..cef2edea1 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -761,7 +761,7 @@ class PostQueryBuilder terms << OpenStruct.new({ type: :metatag, name: metatag.downcase, value: value }) elsif scanner.scan(/[^ ]+/) - terms << OpenStruct.new({ type: :tag, value: scanner.matched.downcase }) + terms << OpenStruct.new({ type: :tag, name: scanner.matched.downcase }) end end @@ -775,7 +775,7 @@ class PostQueryBuilder elsif term.type == :metatag "#{term.name}:#{term.value}" elsif term.type == :tag - term.value + term.name end end end @@ -1097,7 +1097,7 @@ class PostQueryBuilder end else - parse_tag(term.value, q[:tags]) + parse_tag(term.name, q[:tags]) end end @@ -1254,5 +1254,52 @@ class PostQueryBuilder end end + concerning :UtilityMethods do + def tags + scan_query.select { |term| term.type == :tag } + end + + def metatags + scan_query.select { |term| term.type == :metatag } + end + + def find_metatag(metatag) + metatags.find { |term| term.name == metatag.to_s.downcase }.try(:value) + end + + def has_metatag?(*metatag_names) + metatags.any? { |term| term.name.in?(metatag_names.map(&:to_s).map(&:downcase)) } + end + + def is_metatag?(name, value = nil) + if value.nil? + is_single_term? && has_metatag?(name) + else + is_single_term? && find_metatag(name) == value.to_s + end + end + + def is_empty_search? + scan_query.size == 0 + end + + def is_single_term? + scan_query.size == 1 + end + + def is_single_tag? + is_single_term? && tags.size == 1 + end + + def is_simple_tag? + tag = tags.first&.name + is_single_tag? && !tag.starts_with?("-") && !tag.starts_with?("~") && !tag.include?("*") + end + + def is_wildcard_search? + is_single_tag? && tags.first.name.include?("*") + end + end + memoize :scan_query, :split_query, :parse_query end diff --git a/app/logical/post_sets/post.rb b/app/logical/post_sets/post.rb index fc8610852..fe36c4867 100644 --- a/app/logical/post_sets/post.rb +++ b/app/logical/post_sets/post.rb @@ -1,10 +1,11 @@ module PostSets class Post MAX_PER_PAGE = 200 - attr_reader :tag_array, :page, :raw, :random, :post_count, :format + attr_reader :page, :raw, :random, :post_count, :format, :tag_string, :query def initialize(tags, page = 1, per_page = nil, raw: false, random: false, format: "html") - @tag_array = PostQueryBuilder.new(tags).split_query + @query = PostQueryBuilder.new(tags) + @tag_string = tags @page = page @per_page = per_page @raw = raw.to_s.truthy? @@ -12,12 +13,8 @@ module PostSets @format = format.to_s end - def tag_string - @tag_string ||= tag_array.uniq.join(" ") - end - def humanized_tag_string - tag_array.map { |tag| tag.tr("_", " ").titleize }.to_sentence + query.split_query.map { |tag| tag.tr("_", " ").titleize }.to_sentence end def has_blank_wiki? @@ -31,8 +28,8 @@ module PostSets end def tag - return nil if !is_single_tag? - @tag ||= Tag.find_by(name: Tag.normalize_name(tag_string)) + return nil unless query.is_simple_tag? + @tag ||= Tag.find_by(name: query.tags.first.name) end def artist @@ -42,15 +39,15 @@ module PostSets end def pool - name = Tag.has_metatag?(tag_array, :ordpool, :pool) - return nil unless is_single_tag? && name.present? + return nil unless query.is_metatag?(:pool) || query.is_metatag?(:ordpool) + name = query.find_metatag(:pool) || query.find_metatag(:ordpool) @pool ||= Pool.find_by_name(name) end def favgroup - name = Tag.has_metatag?(tag_array, :favgroup) - return nil unless is_single_tag? && name.present? + return nil unless query.is_metatag?(:favgroup) + name = query.find_metatag(:favgroup) @favgroup ||= FavoriteGroup.visible(CurrentUser.user).find_by_name_or_id(name, CurrentUser.user) end @@ -76,11 +73,11 @@ module PostSets end def per_page - (@per_page || Tag.has_metatag?(tag_array, :limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE) + (@per_page || query.find_metatag(:limit) || CurrentUser.user.per_page).to_i.clamp(0, MAX_PER_PAGE) end def is_random? - random || Tag.has_metatag?(tag_array, :order) == "random" + random || query.find_metatag(:order) == "random" end def get_post_count @@ -118,34 +115,14 @@ module PostSets def hide_from_crawler? return true if current_page > 1 - return false if is_empty_tag? || is_simple_tag? || tag_string == "order:rank" + return false if query.is_empty_search? || query.is_simple_tag? || query.is_metatag?(:order, :rank) true end - def is_single_tag? - tag_array.size == 1 - end - - def is_simple_tag? - Tag.is_simple_tag?(tag_string) - end - - def is_empty_tag? - tag_array.empty? - end - - def is_pattern_search? - is_single_tag? && tag_string =~ /\*/ && tag_array.none? {|x| x =~ /^-?source:.+/} - end - def current_page [page.to_i, 1].max end - def is_saved_search? - tag_string =~ /search:/ - end - def presenter @presenter ||= ::PostSetPresenters::Post.new(self) end diff --git a/app/logical/related_tag_query.rb b/app/logical/related_tag_query.rb index d1113821f..b50dce768 100644 --- a/app/logical/related_tag_query.rb +++ b/app/logical/related_tag_query.rb @@ -56,7 +56,7 @@ class RelatedTagQuery versions = PostVersion.where(updater_id: user.id).where("updated_at > ?", since).order(id: :desc).limit(max_edits) tags = versions.flat_map(&:added_tags) - tags = tags.reject { |tag| Tag.is_metatag?(tag) } + tags = tags.select { |tag| PostQueryBuilder.new(tag).is_simple_tag? } tags = tags.group_by(&:itself).transform_values(&:size).sort_by { |tag, count| [-count, tag] }.map(&:first) tags.take(max_tags) end diff --git a/app/models/post.rb b/app/models/post.rb index 772170288..1913b6fec 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1069,7 +1069,7 @@ class Post < ApplicationRecord def fast_count(tags = "", timeout: 1_000, raise_on_timeout: false, skip_cache: false) tags = tags.to_s tags += " rating:s" if CurrentUser.safe_mode? - tags += " -status:deleted" if CurrentUser.hide_deleted_posts? && !Tag.has_metatag?(tags, "status", "-status") + tags += " -status:deleted" if CurrentUser.hide_deleted_posts? && !PostQueryBuilder.new(tags).has_metatag?("status", "-status") tags = PostQueryBuilder.new(tags).normalize_query # Optimize some cases. these are just estimates but at these @@ -1135,7 +1135,7 @@ class Post < ApplicationRecord end def get_count_from_cache(tags) - if Tag.is_simple_tag?(tags) + if PostQueryBuilder.new(tags).is_simple_tag? count = Tag.find_by(name: tags).try(:post_count) else # this will only have a value for multi-tag searches or single metatag searches diff --git a/app/models/tag.rb b/app/models/tag.rb index d341e84b7..d925123b3 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -227,40 +227,6 @@ class Tag < ApplicationRecord end end - module ParseMethods - # true if query is a single "simple" tag (not a metatag, negated tag, or wildcard tag). - def is_simple_tag?(query) - is_single_tag?(query) && !is_metatag?(query) && !is_negated_tag?(query) && !is_optional_tag?(query) && !is_wildcard_tag?(query) - end - - def is_single_tag?(query) - PostQueryBuilder.new(query).split_query.size == 1 - end - - def is_metatag?(tag) - has_metatag?(tag, *PostQueryBuilder::METATAGS) - end - - def is_negated_tag?(tag) - tag.starts_with?("-") - end - - def is_optional_tag?(tag) - tag.starts_with?("~") - end - - def is_wildcard_tag?(tag) - tag.include?("*") - end - - def has_metatag?(tags, *metatags) - return nil if tags.blank? - - tags = PostQueryBuilder.new(tags.to_str).split_query if tags.respond_to?(:to_str) - tags.grep(/\A(?:#{metatags.map(&:to_s).join("|")}):(.+)\z/i) { $1 }.first - end - end - module SearchMethods # ref: https://www.postgresql.org/docs/current/static/pgtrgm.html#idm46428634524336 def order_similarity(name) @@ -393,6 +359,5 @@ class Tag < ApplicationRecord include ApiMethods include CountMethods include CategoryMethods - extend ParseMethods extend SearchMethods end diff --git a/app/models/upload.rb b/app/models/upload.rb index a9d4e5fd4..036bbdb95 100644 --- a/app/models/upload.rb +++ b/app/models/upload.rb @@ -237,7 +237,7 @@ class Upload < ApplicationRecord include SourceMethods def assign_rating_from_tags - if rating = Tag.has_metatag?(tag_string, :rating) + if rating = PostQueryBuilder.new(tag_string).find_metatag(:rating) self.rating = rating.downcase.first end end diff --git a/app/presenters/post_presenter.rb b/app/presenters/post_presenter.rb index 5489bd7d5..6981e7b1d 100644 --- a/app/presenters/post_presenter.rb +++ b/app/presenters/post_presenter.rb @@ -163,7 +163,7 @@ class PostPresenter end def has_sequential_navigation?(params) - return false if Tag.has_metatag?(params[:q], :order, :ordfav, :ordpool) + return false if PostQueryBuilder.new(params[:q]).has_metatag?(:order, :ordfav, :ordpool) return false if params[:pool_id].present? || params[:favgroup_id].present? return CurrentUser.user.enable_sequential_post_navigation end diff --git a/app/presenters/post_set_presenters/post.rb b/app/presenters/post_set_presenters/post.rb index bd67b567e..a60b64ecf 100644 --- a/app/presenters/post_set_presenters/post.rb +++ b/app/presenters/post_set_presenters/post.rb @@ -18,13 +18,13 @@ module PostSetPresenters end def related_tags - if post_set.is_pattern_search? - pattern_tags - elsif post_set.is_saved_search? + if post_set.query.is_wildcard_search? + wildcard_tags + elsif post_set.query.is_metatag?(:search) saved_search_tags - elsif post_set.is_empty_tag? || post_set.tag_string == "order:rank" + elsif post_set.query.is_empty_search? || post_set.query.is_metatag?(:order, :rank) popular_tags - elsif post_set.is_single_tag? + elsif post_set.query.is_single_term? similar_tags else frequent_tags @@ -47,7 +47,7 @@ module PostSetPresenters RelatedTagCalculator.frequent_tags_for_post_array(post_set.posts).take(MAX_TAGS) end - def pattern_tags + def wildcard_tags Tag.wildcard_matches(post_set.tag_string) end @@ -56,7 +56,7 @@ module PostSetPresenters end def tag_list_html(**options) - tag_set_presenter.tag_list_html(name_only: post_set.is_saved_search?, **options) + tag_set_presenter.tag_list_html(name_only: post_set.query.is_metatag?(:search), **options) end end end diff --git a/app/views/posts/index.html.erb b/app/views/posts/index.html.erb index 5eb8b27c7..eba30fb80 100644 --- a/app/views/posts/index.html.erb +++ b/app/views/posts/index.html.erb @@ -19,7 +19,7 @@ <%= render "posts/partials/index/options" %> - <%= render "posts/partials/index/related" %> + <%= render "posts/partials/index/related", post_set: @post_set %> <% end %> <% content_for(:content) do %> diff --git a/app/views/posts/partials/index/_related.html.erb b/app/views/posts/partials/index/_related.html.erb index 479318e7b..d23a094fc 100644 --- a/app/views/posts/partials/index/_related.html.erb +++ b/app/views/posts/partials/index/_related.html.erb @@ -15,7 +15,7 @@
  • <%= link_to "Deleted", posts_path(tags: "#{params[:tags]} status:deleted") %>
  • <%= link_to "Random", random_posts_path(tags: params[:tags]), id: "random-post", "data-shortcut": "r" %>
  • - <% if Tag.is_simple_tag?(params[:tags]) %> + <% if post_set.query.is_simple_tag? %>
  • <%= link_to "History", post_versions_path(search: { changed_tags: params[:tags] }) %>
  • <% end %>
  • <%= link_to "Count", posts_counts_path(:tags => params[:tags]) %>
  • diff --git a/app/views/posts/partials/index/_seo_meta_tags.html.erb b/app/views/posts/partials/index/_seo_meta_tags.html.erb index da236fbcf..4431ae472 100644 --- a/app/views/posts/partials/index/_seo_meta_tags.html.erb +++ b/app/views/posts/partials/index/_seo_meta_tags.html.erb @@ -1,4 +1,4 @@ -<% if @post_set.is_empty_tag? %> +<% if @post_set.query.is_empty_search? %> <% page_title("#{Danbooru.config.app_name}: Anime Image Board", suffix: nil) %> <% meta_description("#{Danbooru.config.canonical_app_name} is the original anime image 'booru. Find over 3.75 million anime pictures categorized by over 100 million tags.") %> diff --git a/test/unit/post_query_builder_test.rb b/test/unit/post_query_builder_test.rb index cdfc69dbe..4288d93bd 100644 --- a/test/unit/post_query_builder_test.rb +++ b/test/unit/post_query_builder_test.rb @@ -850,6 +850,41 @@ class PostQueryBuilderTest < ActiveSupport::TestCase end end + context "Parsing:" do + should "split a query" do + assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) + assert_equal(%w(~aaa -bbb* -bbb*), PostQueryBuilder.new("~AAa -BBB* -bbb*").split_query) + end + + should "not strip out valid characters when scanning" do + assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) + assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query) + end + + should "parse single tags correctly" do + assert_equal(true, PostQueryBuilder.new("foo").is_single_tag?) + assert_equal(true, PostQueryBuilder.new("-foo").is_single_tag?) + assert_equal(true, PostQueryBuilder.new("~foo").is_single_tag?) + assert_equal(true, PostQueryBuilder.new("foo*").is_single_tag?) + assert_equal(false, PostQueryBuilder.new("fav:1234").is_single_tag?) + assert_equal(false, PostQueryBuilder.new("pool:1234").is_single_tag?) + assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_single_tag?) + assert_equal(false, PostQueryBuilder.new("foo bar").is_single_tag?) + end + + should "parse simple tags correctly" do + assert_equal(true, PostQueryBuilder.new("foo").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("-foo").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("~foo").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("foo*").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("fav:1234").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("FAV:1234").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("pool:1234").is_simple_tag?) + assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?) + assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?) + end + end + context "The normalize_query method" do should "work" do create(:tag_alias, antecedent_name: "gray", consequent_name: "grey") diff --git a/test/unit/post_sets/post_test.rb b/test/unit/post_sets/post_test.rb index 1a6e3dc39..95a4458e3 100644 --- a/test/unit/post_sets/post_test.rb +++ b/test/unit/post_sets/post_test.rb @@ -65,16 +65,6 @@ module PostSets end end - context "a set for the 'a b' tag query" do - setup do - @set = PostSets::Post.new("a b") - end - - should "know it isn't a single tag" do - assert(!@set.is_single_tag?) - end - end - context "a set going to the 1,001st page" do setup do @set = PostSets::Post.new("a", 1_001) @@ -118,10 +108,6 @@ module PostSets @set = PostSets::Post.new("a") end - should "know it is a single tag" do - assert(@set.is_single_tag?) - end - should "normalize its tag query" do assert_equal("a", @set.tag_string) end diff --git a/test/unit/tag_test.rb b/test/unit/tag_test.rb index 537e27868..ede77bb90 100644 --- a/test/unit/tag_test.rb +++ b/test/unit/tag_test.rb @@ -91,49 +91,6 @@ class TagTest < ActiveSupport::TestCase end end - context "A tag parser" do - should "scan a query" do - assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) - assert_equal(%w(~aaa -bbb* -bbb*), PostQueryBuilder.new("~AAa -BBB* -bbb*").split_query) - end - - should "not strip out valid characters when scanning" do - assert_equal(%w(aaa bbb), PostQueryBuilder.new("aaa bbb").split_query) - assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), PostQueryBuilder.new("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%").split_query) - end - - should "cast values" do - assert_equal(2048, PostQueryBuilder.parse_cast("2kb", :filesize)) - assert_equal(2097152, PostQueryBuilder.parse_cast("2m", :filesize)) - assert_nothing_raised {PostQueryBuilder.parse_cast("2009-01-01", :date)} - assert_nothing_raised {PostQueryBuilder.parse_cast("1234", :integer)} - assert_nothing_raised {PostQueryBuilder.parse_cast("1234.56", :float)} - end - - should "parse single tags correctly" do - assert_equal(true, Tag.is_single_tag?("foo")) - assert_equal(true, Tag.is_single_tag?("-foo")) - assert_equal(true, Tag.is_single_tag?("~foo")) - assert_equal(true, Tag.is_single_tag?("foo*")) - assert_equal(true, Tag.is_single_tag?("fav:1234")) - assert_equal(true, Tag.is_single_tag?("pool:1234")) - assert_equal(true, Tag.is_single_tag?('source:"foo bar baz"')) - assert_equal(false, Tag.is_single_tag?("foo bar")) - end - - should "parse simple tags correctly" do - assert_equal(true, Tag.is_simple_tag?("foo")) - assert_equal(false, Tag.is_simple_tag?("-foo")) - assert_equal(false, Tag.is_simple_tag?("~foo")) - assert_equal(false, Tag.is_simple_tag?("foo*")) - assert_equal(false, Tag.is_simple_tag?("fav:1234")) - assert_equal(false, Tag.is_simple_tag?("FAV:1234")) - assert_equal(false, Tag.is_simple_tag?("pool:1234")) - assert_equal(false, Tag.is_simple_tag?('source:"foo bar baz"')) - assert_equal(false, Tag.is_simple_tag?("foo bar")) - end - end - context "A tag" do should "be found when one exists" do tag = FactoryBot.create(:tag)