aliases: fix broken wiki links when aliasing tags.

When aliasing A to B, update any wikis linking to [[A]] to link to [[B]]
instead.

This is a best-effort process based on rough heuristics. There are a few
known problems:

* We don't always know how to capitalize the new tag. We try to mimic
  the capitalization of the old tag, such that if the old tag was
  capitalized (because it was at the beginning of a sentence), or if
  every word in the old link was capitalized (because it's a proper
  noun), then the new link will be capitalized in the same way. This can
  handle simple general tags and character tags, but will fail for
  copyright tags with mixed capitalization. For example, we don't know
  that [[jojo_no_kimyou_na_bouken]] should be capitalized as [[JoJo no
  Kimyou na Bouken]]. If we don't know how to capitalize the new tag, we
  leave the old tag as-is so it can manually be fixed.

* Some aliases might require changing how a tag is pluralized. If we
  changed [[rat]] to [[mouse]], then we should change `[[rat]]s` to
  [[mice]]. We don't try to deal with this.

* In general, some changes might require entire sentences to be
  rewritten to keep the grammar correct. Changing something like
  [[skirt lift]] to [[lifting skirt]] could break the grammar of the
  sentence. We don't try to deal with this.
This commit is contained in:
evazion
2020-08-26 18:04:44 -05:00
parent f4f25cf0c8
commit bbf2b53d83
5 changed files with 101 additions and 0 deletions

View File

@@ -157,6 +157,52 @@ class DText
Set.new(parse_external_links(a)) != Set.new(parse_external_links(b))
end
# Rewrite wiki links to [[old_name]] with [[new_name]]. We attempt to match
# the capitalization of the old tag when rewriting it to the new tag, but if
# we can't determine how the new tag should be capitalized based on some
# simple heuristics, then we skip rewriting the tag.
def self.rewrite_wiki_links(dtext, old_name, new_name)
old_name = old_name.downcase.squeeze("_").tr("_", " ").strip
new_name = new_name.downcase.squeeze("_").tr("_", " ").strip
# Match `[[name]]` or `[[name|title]]`
dtext.gsub(/\[\[(.*?)(?:\|(.*?))?\]\]/) do |match|
name = $1
title = $2
# Skip this link if it isn't the tag we're trying to replace.
normalized_name = name.downcase.tr("_", " ").squeeze(" ").strip
next match if normalized_name != old_name
# Strip qualifiers, e.g. `atago (midsummer march) (azur lane)` => `atago`
unqualified_name = name.tr("_", " ").squeeze(" ").strip.gsub(/( \(.*\))+\z/, "")
has_qualifier = name.match?(/( \(.*\))+\z/)
# If old tag was lowercase, e.g. [[ink tank (Splatoon)]], then keep new tag in lowercase.
if unqualified_name == unqualified_name.downcase
final_name = new_name
# If old tag was capitalized, e.g. [[Colored pencil (medium)]], then capitialize new tag.
elsif unqualified_name == unqualified_name.downcase.capitalize
final_name = new_name.capitalize
# If old tag was in titlecase, e.g. [[Hatsune Miku (cosplay)]], then titlecase new tag.
elsif unqualified_name == unqualified_name.split.map(&:capitalize).join(" ")
final_name = new_name.split.map(&:capitalize).join(" ")
# If we can't determine how to capitalize the new tag, then keep the old tag.
# e.g. [[Suzumiya Haruhi no Yuuutsu]] -> [[The Melancholy of Haruhi Suzumiya]]
else
next match
end
if title.present?
"[[#{final_name}|#{title}]]"
elsif has_qualifier
"[[#{final_name}|]]"
else
"[[#{final_name}]]"
end
end
end
def self.strip_blocks(string, tag)
n = 0
stripped = ""

View File

@@ -14,6 +14,7 @@ class TagMover
move_wiki!
move_saved_searches!
move_blacklists!
rewrite_wiki_links!
move_posts!
end
end
@@ -63,6 +64,10 @@ class TagMover
User.rewrite_blacklists!(old_tag.name, new_tag.name)
end
def rewrite_wiki_links!
WikiPage.rewrite_wiki_links!(old_tag.name, new_tag.name)
end
def merge_artists!
old_artist.lock!
new_artist.lock!

View File

@@ -222,6 +222,16 @@ class WikiPage < ApplicationRecord
TagAlias.to_aliased(titles & tags)
end
def self.rewrite_wiki_links!(old_name, new_name)
broken_wikis = WikiPage.linked_to(old_name)
broken_wikis.each do |wiki|
wiki.lock!
wiki.body = DText.rewrite_wiki_links(wiki.body, old_name, new_name)
wiki.save!
end
end
def to_param
if title =~ /\A\d+\z/
"~#{title}"

View File

@@ -5,6 +5,10 @@ class DTextTest < ActiveSupport::TestCase
assert_equal(expected, DText.strip_dtext(dtext))
end
def assert_rewrite_wiki_links(expected, dtext, old, new)
assert_equal(expected, DText.rewrite_wiki_links(dtext, old, new))
end
context "DText" do
context "#strip_dtext" do
should "strip dtext markup from the input" do
@@ -125,6 +129,31 @@ class DTextTest < ActiveSupport::TestCase
end
end
context "#rewrite_wiki_links" do
should "work" do
assert_rewrite_wiki_links("[[rabbit]]", "[[bunny]]", "bunny", "rabbit")
assert_rewrite_wiki_links("[[rabbit|bun]]", "[[bunny|bun]]", "bunny", "rabbit")
assert_rewrite_wiki_links("[[cat]] [[rabbit]]", "[[cat]] [[rabbit]]", "bunny", "rabbit")
assert_rewrite_wiki_links("I like [[cat]]s and [[bunny]]s", "I like [[cat]]s and [[rabbit]]s", "rabbit", "bunny")
assert_rewrite_wiki_links("[[miku hatsune (cosplay)|]]", "[[hatsune miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)")
assert_rewrite_wiki_links("[[Miku hatsune (cosplay)|]]", "[[Hatsune miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)")
assert_rewrite_wiki_links("[[Miku Hatsune (cosplay)|]]", "[[Hatsune Miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)")
assert_rewrite_wiki_links("[[miku hatsune (cosplay)|miku]]", "[[hatsune miku (cosplay)|miku]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)")
assert_rewrite_wiki_links("[[the legend of zelda]]", "[[zelda no densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[The legend of zelda]]", "[[Zelda no densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[The Legend Of Zelda]]", "[[Zelda No Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[the legend of zelda]]", "[[zelda_no_densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[The legend of zelda]]", "[[Zelda_no_densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[The Legend Of Zelda]]", "[[Zelda_No_Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[Zelda no Densetsu]]", "[[Zelda no Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
assert_rewrite_wiki_links("[[Zelda_no_Densetsu]]", "[[Zelda_no_Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda")
end
end
context "#from_html" do
should "convert basic html to dtext" do
assert_equal("[b]abc[/b] [i]def[/i] [u]ghi[/u]", DText.from_html("<b>abc</b> <i>def</i> <u>ghi</u>"))

View File

@@ -197,6 +197,17 @@ class TagAliasTest < ActiveSupport::TestCase
assert_equal(%w[111 333], @wiki2.other_names)
assert_equal("second", @wiki2.body)
end
should "rewrite links in other wikis to use the new tag" do
@wiki = create(:wiki_page, body: "foo [[aaa]] bar")
@ta = create(:tag_alias, antecedent_name: "aaa", consequent_name: "bbb")
@ta.approve!(approver: @admin)
perform_enqueued_jobs
assert_equal("active", @ta.reload.status)
assert_equal("foo [[bbb]] bar", @wiki.reload.body)
end
end
context "when the tags have artist entries" do