From d884cb6642ed050f16d6de3ca0b949984c6bcae8 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 12 Aug 2020 17:57:22 -0500 Subject: [PATCH] search: fix parsing of quoted metatag values. * Fix #4552: Multiple quoted search terms not parsed correctly. * Allow quotes to be escaped in quoted metatags. * Allow spaces to be escaped in unquoted metatags. * Allow the empty string to be used in metatags. Examples: * `source:""` and `source:''` (same as `source:none`) * `source:foo\ bar\ baz` (same as `source:"foo bar baz"`) * `source:"don't say \"lazy\""` (use \" to write a literal ") * `source:'don\'t say "lazy"'` (use \' to write a literal ') * `source:"C:\\Windows"` (use \\ to write a literal \) --- app/logical/post_query_builder.rb | 60 +++++++++++++++++----------- test/unit/post_query_builder_test.rb | 38 ++++++++++++++++++ 2 files changed, 74 insertions(+), 24 deletions(-) diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index 2d2835bcc..e31a939f1 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -332,8 +332,9 @@ class PostQueryBuilder end def source_matches(source, quoted = false) - case source.downcase - in "none" unless quoted + if source.empty? + Post.where_like(:source, "") + elsif source.downcase == "none" && !quoted Post.where_like(:source, "") else Post.where_ilike(:source, source + "*") @@ -644,14 +645,7 @@ class PostQueryBuilder if scanner.scan(/(-)?(#{METATAGS.join("|")}):/io) operator = scanner.captures.first metatag = scanner.captures.second.downcase - - if scanner.scan(/"(.+)"/) || scanner.scan(/'(.+)'/) - value = scanner.captures.first - quoted = true - else - value = scanner.scan(/[^ ]*/) - quoted = false - end + value, quoted = scan_string(scanner) if metatag.in?(COUNT_METATAG_SYNONYMS) metatag = metatag.singularize + "_count" @@ -675,23 +669,41 @@ class PostQueryBuilder terms end + def scan_string(scanner) + if scanner.scan(/"((?:\\"|[^"])*)"/) + value = scanner.captures.first.gsub(/\\(.)/) { $1 } + quoted = true + elsif scanner.scan(/'((?:\\'|[^'])*)'/) + value = scanner.captures.first.gsub(/\\(.)/) { $1 } + quoted = true + else + value = scanner.scan(/(\\ |[^ ])*/) + value = value.gsub(/\\ /) { " " } + quoted = false + end + + [value, quoted] + end + def split_query terms.map do |term| - if term.type == :metatag && !term.negated && !term.quoted - "#{term.name}:#{term.value}" - elsif term.type == :metatag && !term.negated && term.quoted - "#{term.name}:\"#{term.value}\"" - elsif term.type == :metatag && term.negated && !term.quoted - "-#{term.name}:#{term.value}" - elsif term.type == :metatag && term.negated && term.quoted - "-#{term.name}:\"#{term.value}\"" - elsif term.type == :tag && term.negated - "-#{term.name}" - elsif term.type == :tag && term.optional - "~#{term.name}" - elsif term.type == :tag - term.name + type, name, value = term.type, term.name, term.value + + str = "" + str += "-" if term.negated + str += "~" if term.optional + + if type == :tag + str += name + elsif type == :metatag && (term.quoted || value.include?(" ")) + value = value.gsub(/\\/) { '\\\\' } + value = value.gsub(/"/) { '\\"' } + str += "#{name}:\"#{value}\"" + elsif type == :metatag + str += "#{name}:#{value}" end + + str end end diff --git a/test/unit/post_query_builder_test.rb b/test/unit/post_query_builder_test.rb index 5810a9b92..d51d3c856 100644 --- a/test/unit/post_query_builder_test.rb +++ b/test/unit/post_query_builder_test.rb @@ -9,6 +9,13 @@ class PostQueryBuilderTest < ActiveSupport::TestCase assert_equal(count, PostQueryBuilder.new(query, **query_options).normalized_query.fast_count(**fast_count_options)) end + def assert_parse_equals(expected, query) + assert_equal(expected, PostQueryBuilder.new(query).split_query) + + # parsing, serializing, then parsing again should produce the same result. + assert_equal(PostQueryBuilder.new(query).to_s, PostQueryBuilder.new(PostQueryBuilder.new(query).to_s).to_s) + end + setup do CurrentUser.user = create(:user) CurrentUser.ip_addr = "127.0.0.1" @@ -718,7 +725,11 @@ class PostQueryBuilderTest < ActiveSupport::TestCase assert_tag_match([post3], "source:none") assert_tag_match([post3], "source:NONE") + assert_tag_match([post3], 'source:""') + assert_tag_match([post3], "source:''") assert_tag_match([post2, post1], "-source:none") + assert_tag_match([post2, post1], "-source:''") + assert_tag_match([post2, post1], '-source:""') assert_tag_match([], "source:'none'") assert_tag_match([], "source:none source:abcde") @@ -1066,6 +1077,33 @@ class PostQueryBuilderTest < ActiveSupport::TestCase assert_equal(false, PostQueryBuilder.new('source:"foo bar baz"').is_simple_tag?) assert_equal(false, PostQueryBuilder.new("foo bar").is_simple_tag?) end + + should "parse quoted metatags correctly" do + assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:'https')) + assert_parse_equals(%w[source:"https" status:"active"], %q(source:'https' status:'active')) + assert_parse_equals(%w[status:"active" source:"https"], %q(status:"active" source:'https')) + assert_parse_equals(%w[status:"active" source:"https"], %q(status:'active' source:"https")) + assert_parse_equals(%w[status:"active" source:https], %q(status:'active' source:https)) + assert_parse_equals(%w[status:active source:"https"], %q(status:active source:'https')) + + assert_parse_equals(%w[limit:"5" status:"active" source:"x"], %q(limit:"5" status:"active" source:"x")) + assert_parse_equals(%w[source:"" limit:"1" status:"deleted"], %q(source:"" limit:'1' status:'deleted')) + + assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy")) + assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:"bar baz" don't_say_"lazy")) + assert_parse_equals(['source:"bar baz"', 'don\'t_say_"lazy"'], %q(source:'bar baz' don't_say_"lazy")) + + assert_parse_equals([%q(source:"foo")], %q(source:"\f\o\o")) + assert_parse_equals([%q(source:"foo")], %q(source:'\f\o\o')) + assert_parse_equals([%q(source:foo\bar)], %q(source:foo\bar)) + assert_parse_equals([%q(source:"foo)], %q(source:"foo)) + assert_parse_equals([%q(source:'foo)], %q(source:'foo)) + assert_parse_equals([%q(source:"foo bar")], %q(source:foo\ bar)) + assert_parse_equals([%q(source:"\"foo bar\\\\")], %q(source:"foo\ bar\\)) + + assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:"don't_say_\"lazy\"" don't_say_"lazy")) + assert_parse_equals(['source:"don\'t_say_\\"lazy\\""', 'don\'t_say_"lazy"'], %q(source:'don\'t_say_"lazy"' don't_say_"lazy")) + end end context "The normalized_query method" do