From 853ddc7cdac7218b58e348e7c0f54762985e68df Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 7 Feb 2017 23:03:35 -0600 Subject: [PATCH 1/6] tag.rb: validate tag names. Makes these tag names invalid when tagging posts: * Blank names (e.g. ___). * Non-ASCII characters (Japanese text). * Non-printable characters (e.g. control characters < 0x20). * Leading or trailing underscores, or consecutive underscores. Reason: `_foo__bar_` and `foo_bar` both render as `foo bar` in the tag list. * Leading `-` or `~`, or '*' (-foo, ~foo, foo*bar). Previously these were silently stripped, but that meant tagging a post with e.g. `*-foo` tagged the post with the invalid tag `-foo`. * Tag type prefixes (e.g. `character:character:hatsune_miku` no longer creates the literal tag `character:hatsune_miku`). * Metatags (order:score, user:evazion, etc). --- app/logical/tag_name_validator.rb | 30 ++++++++++++++++++++++++++++++ app/models/tag.rb | 4 +++- 2 files changed, 33 insertions(+), 1 deletion(-) create mode 100644 app/logical/tag_name_validator.rb diff --git a/app/logical/tag_name_validator.rb b/app/logical/tag_name_validator.rb new file mode 100644 index 000000000..872b9861d --- /dev/null +++ b/app/logical/tag_name_validator.rb @@ -0,0 +1,30 @@ +class TagNameValidator < ActiveModel::EachValidator + def validate_each(record, attribute, value) + case Tag.normalize_name(value) + when /\A_*\z/ + record.errors[attribute] << "'#{value}' cannot be blank" + when /\*/ + record.errors[attribute] << "'#{value}' cannot contain asterisks ('*')" + when /,/ + record.errors[attribute] << "'#{value}' cannot contain commas (',')" + when /\A~/ + record.errors[attribute] << "'#{value}' cannot begin with a tilde ('~')" + when /\A-/ + record.errors[attribute] << "'#{value}' cannot begin with a dash ('-')" + when /\A_/ + record.errors[attribute] << "'#{value}' cannot begin with an underscore" + when /_\z/ + record.errors[attribute] << "'#{value}' cannot end with an underscore" + when /__/ + record.errors[attribute] << "'#{value}' cannot contain consecutive underscores" + when /[^[[:graph:]]]/ + record.errors[attribute] << "'#{value}' cannot contain non-printable characters" + when /[^[[:ascii:]]]/ + record.errors[attribute] << "'#{value}' must consist of only ASCII characters" + when /\A(#{Tag::METATAGS}|#{Tag::SUBQUERY_METATAGS}):(.+)\z/i + record.errors[attribute] << "'#{value}' cannot begin with '#{$1}:'" + when /\A(#{Tag.categories.regexp}):(.+)\z/i + record.errors[attribute] << "'#{value}' cannot begin with '#{$1}:'" + end + end +end diff --git a/app/models/tag.rb b/app/models/tag.rb index 445c001f2..301b89f1b 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -6,6 +6,8 @@ class Tag < ActiveRecord::Base attr_accessible :is_locked, :as => [:moderator, :admin] has_one :wiki_page, :foreign_key => "title", :primary_key => "name" + validates :name, uniqueness: true, tag_name: true, on: :create + module ApiMethods def to_legacy_json return { @@ -179,7 +181,7 @@ class Tag < ActiveRecord::Base module NameMethods def normalize_name(name) - name.mb_chars.downcase.tr(" ", "_").gsub(/\A[-~]+/, "").gsub(/\*/, "").to_s + name.to_s.mb_chars.downcase.strip.tr(" ", "_").to_s end def find_or_create_by_name(name, options = {}) From 8e3296f3ea94f9afa65498a96e918498fa34a347 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 8 Feb 2017 22:53:37 -0600 Subject: [PATCH 2/6] tag.rb: don't strip '%' or ',' from tags; split on unicode spaces. Allows '%' and ',' in tag names. There's no technical need to forbid these characters. Fixes an issue with these characters being stripped away inside metatags (e.g. tagging a post with `pool:ichigo_100%` strips away the '%'). --- app/models/tag.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app/models/tag.rb b/app/models/tag.rb index 301b89f1b..bb2e5dc93 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -229,13 +229,13 @@ class Tag < ActiveRecord::Base def scan_query(query) tagstr = normalize(query) list = tagstr.scan(/-?source:".*?"/) || [] - list + tagstr.gsub(/-?source:".*?"/, "").scan(/\S+/).uniq + list + tagstr.gsub(/-?source:".*?"/, "").scan(/[^[:space:]]+/).uniq end def scan_tags(tags, options = {}) tagstr = normalize(tags) list = tagstr.scan(/source:".*?"/) || [] - list += tagstr.gsub(/source:".*?"/, "").gsub(/[%,]/, "").scan(/\S+/).uniq + list += tagstr.gsub(/source:".*?"/, "").scan(/[^[:space:]]+/).uniq if options[:strip_metatags] list = list.map {|x| x.sub(/^[-~]/, "")} end From 347f11b9355ff37c3530e32f4aa48eda7b89e1e5 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 8 Feb 2017 22:46:43 -0600 Subject: [PATCH 3/6] post.rb: group validation methods together. --- app/models/post.rb | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index 343a1926c..1e464e1d8 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1307,13 +1307,6 @@ class Post < ActiveRecord::Base end end - def post_is_not_its_own_parent - if !new_record? && id == parent_id - errors[:base] << "Post cannot have itself as a parent" - false - end - end - def parent_exists? Post.exists?(parent_id) end @@ -1689,6 +1682,24 @@ class Post < ActiveRecord::Base end end end + + module ValidationMethods + def post_is_not_its_own_parent + if !new_record? && id == parent_id + errors[:base] << "Post cannot have itself as a parent" + false + end + end + + def updater_can_change_rating + if rating_changed? && is_rating_locked? + # Don't forbid changes if the rating lock was just now set in the same update. + if !is_rating_locked_changed? + errors.add(:rating, "is locked and cannot be changed. Unlock the post first.") + end + end + end + end include FileMethods include ImageMethods @@ -1709,6 +1720,7 @@ class Post < ActiveRecord::Base extend SearchMethods include PixivMethods include IqdbMethods + include ValidationMethods include Danbooru::HasBitFlags BOOLEAN_ATTRIBUTES = %w( @@ -1765,15 +1777,6 @@ class Post < ActiveRecord::Base save end - def updater_can_change_rating - if rating_changed? && is_rating_locked? - # Don't forbid changes if the rating lock was just now set in the same update. - if !is_rating_locked_changed? - errors.add(:rating, "is locked and cannot be changed. Unlock the post first.") - end - end - end - def update_column(name, value) ret = super(name, value) notify_pubsub From facf819620bb16447af9ae13c54fc6fdaa3a1731 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 8 Feb 2017 22:48:48 -0600 Subject: [PATCH 4/6] post.rb: validate newly added tags. Existing tags with invalid names are still permitted. This is to allow for a gradual transition to good tag names. --- app/models/post.rb | 16 ++++++++++++++++ app/models/tag.rb | 2 +- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/app/models/post.rb b/app/models/post.rb index 1e464e1d8..6b5f202e6 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -16,6 +16,7 @@ class Post < ActiveRecord::Base before_validation :remove_parent_loops validates_uniqueness_of :md5, :on => :create validates_inclusion_of :rating, in: %w(s q e), message: "rating must be s, q, or e" + validate :tag_names_are_valid validate :post_is_not_its_own_parent validate :updater_can_change_rating before_save :update_tag_post_counts @@ -1699,6 +1700,21 @@ class Post < ActiveRecord::Base end end end + + def tag_names_are_valid + # only validate new tags; allow invalid names for tags that already exist. + added_tags = tag_array - tag_array_was + new_tags = added_tags - Tag.where(name: added_tags).pluck(:name) + + new_tags.each do |name| + tag = Tag.new(name: name) + tag.valid? + + tag.errors.messages.each do |attribute, messages| + errors[:tag_string] << "tag #{attribute} #{messages.join(';')}" + end + end + end end include FileMethods diff --git a/app/models/tag.rb b/app/models/tag.rb index bb2e5dc93..db72c837f 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -2,7 +2,7 @@ class Tag < ActiveRecord::Base COSINE_SIMILARITY_RELATED_TAG_THRESHOLD = 1000 METATAGS = "-user|user|-approver|approver|commenter|comm|noter|noteupdater|artcomm|-pool|pool|ordpool|-favgroup|favgroup|-fav|fav|ordfav|sub|md5|-rating|rating|-locked|locked|width|height|mpixels|ratio|score|favcount|filesize|source|-source|id|-id|date|age|order|limit|-status|status|tagcount|gentags|arttags|chartags|copytags|parent|-parent|child|pixiv_id|pixiv|search|upvote|downvote|filetype|-filetype" SUBQUERY_METATAGS = "commenter|comm|noter|noteupdater|artcomm" - attr_accessible :category, :as => [:moderator, :janitor, :gold, :platinum, :member, :anonymous, :default, :builder, :admin] + attr_accessible :name, :category attr_accessible :is_locked, :as => [:moderator, :admin] has_one :wiki_page, :foreign_key => "title", :primary_key => "name" From f971f927fd4348342215a5683f056439653c7319 Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 7 Feb 2017 23:03:14 -0600 Subject: [PATCH 5/6] post_test.rb: add tag name validation tests. --- test/unit/post_test.rb | 73 ++++++++++++++++++++++++++++++++++++++++++ test/unit/tag_test.rb | 30 +++++++++++++++-- 2 files changed, 101 insertions(+), 2 deletions(-) diff --git a/test/unit/post_test.rb b/test/unit/post_test.rb index 117bd1ab6..0ac525442 100644 --- a/test/unit/post_test.rb +++ b/test/unit/post_test.rb @@ -558,6 +558,72 @@ class PostTest < ActiveSupport::TestCase end end + context "tagged with a valid tag" do + subject { @post } + + should allow_value("touhou 100%").for(:tag_string) + should allow_value("touhou FOO").for(:tag_string) + should allow_value("touhou -foo").for(:tag_string) + should allow_value("touhou pool:foo").for(:tag_string) + should allow_value("touhou -pool:foo").for(:tag_string) + should allow_value("touhou newpool:foo").for(:tag_string) + should allow_value("touhou fav:self").for(:tag_string) + should allow_value("touhou -fav:self").for(:tag_string) + should allow_value("touhou upvote:self").for(:tag_string) + should allow_value("touhou downvote:self").for(:tag_string) + should allow_value("touhou parent:1").for(:tag_string) + should allow_value("touhou child:1").for(:tag_string) + should allow_value("touhou source:foo").for(:tag_string) + should allow_value("touhou rating:z").for(:tag_string) + should allow_value("touhou locked:rating").for(:tag_string) + should allow_value("touhou -locked:rating").for(:tag_string) + + # \u3000 = ideographic space, \u00A0 = no-break space + should allow_value("touhou\u3000foo").for(:tag_string) + should allow_value("touhou\u00A0foo").for(:tag_string) + end + + context "tagged with an invalid tag" do + subject { @post } + + context "that doesn't already exist" do + should_not allow_value("touhou user:evazion").for(:tag_string) + should_not allow_value("touhou *~foo").for(:tag_string) + should_not allow_value("touhou *-foo").for(:tag_string) + should_not allow_value("touhou ,-foo").for(:tag_string) + + should_not allow_value("touhou ___").for(:tag_string) + should_not allow_value("touhou ~foo").for(:tag_string) + should_not allow_value("touhou _foo").for(:tag_string) + should_not allow_value("touhou foo_").for(:tag_string) + should_not allow_value("touhou foo__bar").for(:tag_string) + should_not allow_value("touhou foo*bar").for(:tag_string) + should_not allow_value("touhou foo,bar").for(:tag_string) + should_not allow_value("touhou foo\abar").for(:tag_string) + should_not allow_value("touhou café").for(:tag_string) + should_not allow_value("touhou 東方").for(:tag_string) + end + + context "that already exists" do + setup do + %W(___ ~foo _foo foo_ foo__bar foo*bar foo,bar foo\abar café 東方).each do |tag| + FactoryGirl.build(:tag, name: tag).save(validate: false) + end + end + + should allow_value("touhou ___").for(:tag_string) + should allow_value("touhou ~foo").for(:tag_string) + should allow_value("touhou _foo").for(:tag_string) + should allow_value("touhou foo_").for(:tag_string) + should allow_value("touhou foo__bar").for(:tag_string) + should allow_value("touhou foo*bar").for(:tag_string) + should allow_value("touhou foo,bar").for(:tag_string) + should allow_value("touhou foo\abar").for(:tag_string) + should allow_value("touhou café").for(:tag_string) + should allow_value("touhou 東方").for(:tag_string) + end + end + context "tagged with a metatag" do context "for a parent" do setup do @@ -674,6 +740,13 @@ class PostTest < ActiveSupport::TestCase assert_equal("pool:#{@pool.id} pool:series", @post.pool_string) end end + + context "with special characters" do + should "not strip '%' from the name" do + @post.update(tag_string: "aaa newpool:ichigo_100%") + assert(Pool.exists?(name: "ichigo_100%")) + end + end end end diff --git a/test/unit/tag_test.rb b/test/unit/tag_test.rb index a1e253d13..3ba6086d1 100644 --- a/test/unit/tag_test.rb +++ b/test/unit/tag_test.rb @@ -137,9 +137,9 @@ class TagTest < ActiveSupport::TestCase assert_equal(%w(~AAa -BBB* -bbb*), Tag.scan_query("~AAa -BBB* -bbb*")) end - should "strip out invalid characters when scanning" do + should "not strip out valid characters when scanning" do assert_equal(%w(aaa bbb), Tag.scan_tags("aaa bbb")) - assert_equal(%w(-BB;B* -b_b_b_), Tag.scan_tags("-B,B;B* -b_b_b_")) + assert_equal(%w(favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%), Tag.scan_tags("favgroup:yondemasu_yo,_azazel-san. pool:ichigo_100%")) end should "cast values" do @@ -207,6 +207,32 @@ class TagTest < ActiveSupport::TestCase assert_equal(Tag.categories.artist, tag.category) end end + + context "during name validation" do + # tags with spaces or uppercase are allowed because they are normalized + # to lowercase with underscores. + should allow_value(" foo ").for(:name).on(:create) + should allow_value("foo bar").for(:name).on(:create) + should allow_value("FOO").for(:name).on(:create) + + should_not allow_value("").for(:name).on(:create) + should_not allow_value("___").for(:name).on(:create) + should_not allow_value("~foo").for(:name).on(:create) + should_not allow_value("-foo").for(:name).on(:create) + should_not allow_value("_foo").for(:name).on(:create) + should_not allow_value("foo_").for(:name).on(:create) + should_not allow_value("foo__bar").for(:name).on(:create) + should_not allow_value("foo*bar").for(:name).on(:create) + should_not allow_value("foo,bar").for(:name).on(:create) + should_not allow_value("foo\abar").for(:name).on(:create) + should_not allow_value("café").for(:name).on(:create) + should_not allow_value("東方").for(:name).on(:create) + + metatags = Tag::METATAGS.split("|") + Tag::SUBQUERY_METATAGS.split("|") + Danbooru.config.tag_category_mapping.keys + metatags.split("|").each do |metatag| + should_not allow_value("#{metatag}:foo").for(:name).on(:create) + end + end end context "A tag with a negative post count" do From c926f75918fb1ec21673c18b4bbf4657de3982de Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 9 Feb 2017 02:14:48 -0600 Subject: [PATCH 6/6] fixup! post.rb: validate newly added tags. --- app/models/post.rb | 3 ++- app/models/tag.rb | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/app/models/post.rb b/app/models/post.rb index 6b5f202e6..059527a73 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -1707,7 +1707,8 @@ class Post < ActiveRecord::Base new_tags = added_tags - Tag.where(name: added_tags).pluck(:name) new_tags.each do |name| - tag = Tag.new(name: name) + tag = Tag.new + tag.name = name tag.valid? tag.errors.messages.each do |attribute, messages| diff --git a/app/models/tag.rb b/app/models/tag.rb index db72c837f..bb2e5dc93 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -2,7 +2,7 @@ class Tag < ActiveRecord::Base COSINE_SIMILARITY_RELATED_TAG_THRESHOLD = 1000 METATAGS = "-user|user|-approver|approver|commenter|comm|noter|noteupdater|artcomm|-pool|pool|ordpool|-favgroup|favgroup|-fav|fav|ordfav|sub|md5|-rating|rating|-locked|locked|width|height|mpixels|ratio|score|favcount|filesize|source|-source|id|-id|date|age|order|limit|-status|status|tagcount|gentags|arttags|chartags|copytags|parent|-parent|child|pixiv_id|pixiv|search|upvote|downvote|filetype|-filetype" SUBQUERY_METATAGS = "commenter|comm|noter|noteupdater|artcomm" - attr_accessible :name, :category + attr_accessible :category, :as => [:moderator, :janitor, :gold, :platinum, :member, :anonymous, :default, :builder, :admin] attr_accessible :is_locked, :as => [:moderator, :admin] has_one :wiki_page, :foreign_key => "title", :primary_key => "name"