tags: populate words column.
Add code for parsing tags into words and for populating the `words` column in the tags table.
This commit is contained in:
@@ -171,6 +171,11 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
|
||||
concerning :NameMethods do
|
||||
def name=(name)
|
||||
super(name)
|
||||
self.words = Tag.parse_words(name)
|
||||
end
|
||||
|
||||
def pretty_name
|
||||
name.tr("_", " ")
|
||||
end
|
||||
@@ -236,6 +241,47 @@ class Tag < ApplicationRecord
|
||||
end
|
||||
end
|
||||
|
||||
concerning :WordMethods do
|
||||
# Characters that delimit words in tags.
|
||||
WORD_DELIMITERS = " _+:;!.\/()-"
|
||||
WORD_DELIMITER_REGEX = /([#{WORD_DELIMITERS}]+)/
|
||||
|
||||
class_methods do
|
||||
# Split the tag at word boundaries.
|
||||
#
|
||||
# Tag.split_words("jeanne_d'arc_alter_(fate)") => ["jeanne", "_", "d'arc", "_", "alter", "_(", "fate", ")"]
|
||||
# Tag.split_words("k-on!") => ["k", "-", "on!"]
|
||||
# Tag.split_words("<o>_<o>") => ["<o>_<o>"]
|
||||
def split_words(name)
|
||||
return [name] if !parsable_into_words?(name)
|
||||
|
||||
name.split(WORD_DELIMITER_REGEX).compact_blank
|
||||
end
|
||||
|
||||
# Parse the tag into plain words, removing punctuation and delimiters.
|
||||
#
|
||||
# Tag.parse_words("jeanne_d'arc_alter_(fate)") => ["jeanne", "d'arc", "alter", "fate"]
|
||||
# Tag.parse_words("k-on!") => ["k", "on"]
|
||||
# Tag.parse_words("<o>_<o>") => ["<o>_<o>"]
|
||||
def parse_words(name)
|
||||
return [name] if !parsable_into_words?(name)
|
||||
|
||||
split_words(name).map do |word|
|
||||
word.remove(/\A[^a-zA-Z0-9]+|[^a-zA-Z0-9]+\z/)
|
||||
end.compact_blank
|
||||
end
|
||||
|
||||
# True if the tag can be parsed into words (it contains at least 2 contiguous letters or numbers).
|
||||
#
|
||||
# Tag.parsable_into_words?("k-on!") => true
|
||||
# Tag.parsable_into_words?("<o>_<o>") => false
|
||||
# Tag.parsable_into_words?("m.u.g.e.n") => false
|
||||
def parsable_into_words?(name)
|
||||
name.match?(/[a-zA-Z0-9]{2}/)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
module SearchMethods
|
||||
def autocorrect_matches(name)
|
||||
fuzzy_name_matches(name).order_similarity(name)
|
||||
|
||||
8
script/fixes/113_add_tag_words.rb
Executable file
8
script/fixes/113_add_tag_words.rb
Executable file
@@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env ruby
|
||||
|
||||
require_relative "base"
|
||||
|
||||
Tag.find_each do |tag|
|
||||
tag.update_columns(words: Tag.parse_words(tag.name))
|
||||
p tag
|
||||
end
|
||||
@@ -140,6 +140,40 @@ class TagTest < ActiveSupport::TestCase
|
||||
end
|
||||
end
|
||||
|
||||
should "parse tag names into words" do
|
||||
assert_equal(%w[very long hair], Tag.new(name: "very_long_hair").words)
|
||||
|
||||
assert_equal(%w[k on], Tag.new(name: "k-on!").words)
|
||||
assert_equal(%w[hack], Tag.new(name: ".hack//").words)
|
||||
assert_equal(%w[re zero], Tag.new(name: "re:zero").words)
|
||||
assert_equal(%w[compass], Tag.new(name: "#compass").words)
|
||||
assert_equal(%w[me me me], Tag.new(name: "me!me!me!").words)
|
||||
assert_equal(%w[d gray man], Tag.new(name: "d.gray-man").words)
|
||||
assert_equal(%w[steins gate], Tag.new(name: "steins;gate").words)
|
||||
assert_equal(%w[ssss gridman], Tag.new(name: "ssss.gridman").words)
|
||||
assert_equal(%w[yu gi oh 5d's], Tag.new(name: "yu-gi-oh!_5d's").words)
|
||||
assert_equal(%w[jack o lantern], Tag.new(name: "jack-o'-lantern").words)
|
||||
assert_equal(%w[d va overwatch], Tag.new(name: "d.va_(overwatch)").words)
|
||||
assert_equal(%w[rosario vampire], Tag.new(name: "rosario+vampire").words)
|
||||
assert_equal(%w[girls frontline], Tag.new(name: "girls'_frontline").words)
|
||||
assert_equal(%w[fate grand order], Tag.new(name: "fate/grand_order").words)
|
||||
assert_equal(%w[yorha no 2 type b], Tag.new(name: "yorha_no._2_type_b").words)
|
||||
assert_equal(%w[love live sunshine], Tag.new(name: "love_live!_sunshine!!").words)
|
||||
assert_equal(%w[jeanne d'arc alter ver shinjuku 1999 fate], Tag.new(name: "jeanne_d'arc_alter_(ver._shinjuku_1999)_(fate)").words)
|
||||
|
||||
assert_equal(%w[:o], Tag.new(name: ":o").words)
|
||||
assert_equal(%w[o_o], Tag.new(name: "o_o").words)
|
||||
assert_equal(%w[^_^], Tag.new(name: "^_^").words)
|
||||
assert_equal(%w[^^^], Tag.new(name: "^^^").words)
|
||||
assert_equal(%w[c.c.], Tag.new(name: "c.c.").words)
|
||||
assert_equal(%w[\||/], Tag.new(name: '\||/').words)
|
||||
assert_equal(%w[\(^o^)/], Tag.new(name: '\(^o^)/').words)
|
||||
assert_equal(%w[<o>_<o>], Tag.new(name: "<o>_<o>").words)
|
||||
assert_equal(%w[<|>_<|>], Tag.new(name: "<|>_<|>").words)
|
||||
assert_equal(%w[k-----s], Tag.new(name: "k-----s").words)
|
||||
assert_equal(%w[m.u.g.e.n], Tag.new(name: "m.u.g.e.n").words)
|
||||
end
|
||||
|
||||
context "during name validation" do
|
||||
# tags with spaces or uppercase are allowed because they are normalized
|
||||
# to lowercase with underscores.
|
||||
|
||||
Reference in New Issue
Block a user