From bbf2b53d831c0a22095938c110d2d0dfa6cf5084 Mon Sep 17 00:00:00 2001 From: evazion Date: Wed, 26 Aug 2020 18:04:44 -0500 Subject: [PATCH] aliases: fix broken wiki links when aliasing tags. When aliasing A to B, update any wikis linking to [[A]] to link to [[B]] instead. This is a best-effort process based on rough heuristics. There are a few known problems: * We don't always know how to capitalize the new tag. We try to mimic the capitalization of the old tag, such that if the old tag was capitalized (because it was at the beginning of a sentence), or if every word in the old link was capitalized (because it's a proper noun), then the new link will be capitalized in the same way. This can handle simple general tags and character tags, but will fail for copyright tags with mixed capitalization. For example, we don't know that [[jojo_no_kimyou_na_bouken]] should be capitalized as [[JoJo no Kimyou na Bouken]]. If we don't know how to capitalize the new tag, we leave the old tag as-is so it can manually be fixed. * Some aliases might require changing how a tag is pluralized. If we changed [[rat]] to [[mouse]], then we should change `[[rat]]s` to [[mice]]. We don't try to deal with this. * In general, some changes might require entire sentences to be rewritten to keep the grammar correct. Changing something like [[skirt lift]] to [[lifting skirt]] could break the grammar of the sentence. We don't try to deal with this. --- app/logical/d_text.rb | 46 +++++++++++++++++++++++++++++++++++++ app/logical/tag_mover.rb | 5 ++++ app/models/wiki_page.rb | 10 ++++++++ test/unit/d_text_test.rb | 29 +++++++++++++++++++++++ test/unit/tag_alias_test.rb | 11 +++++++++ 5 files changed, 101 insertions(+) diff --git a/app/logical/d_text.rb b/app/logical/d_text.rb index 26f451a47..e0e5ff10e 100644 --- a/app/logical/d_text.rb +++ b/app/logical/d_text.rb @@ -157,6 +157,52 @@ class DText Set.new(parse_external_links(a)) != Set.new(parse_external_links(b)) end + # Rewrite wiki links to [[old_name]] with [[new_name]]. We attempt to match + # the capitalization of the old tag when rewriting it to the new tag, but if + # we can't determine how the new tag should be capitalized based on some + # simple heuristics, then we skip rewriting the tag. + def self.rewrite_wiki_links(dtext, old_name, new_name) + old_name = old_name.downcase.squeeze("_").tr("_", " ").strip + new_name = new_name.downcase.squeeze("_").tr("_", " ").strip + + # Match `[[name]]` or `[[name|title]]` + dtext.gsub(/\[\[(.*?)(?:\|(.*?))?\]\]/) do |match| + name = $1 + title = $2 + + # Skip this link if it isn't the tag we're trying to replace. + normalized_name = name.downcase.tr("_", " ").squeeze(" ").strip + next match if normalized_name != old_name + + # Strip qualifiers, e.g. `atago (midsummer march) (azur lane)` => `atago` + unqualified_name = name.tr("_", " ").squeeze(" ").strip.gsub(/( \(.*\))+\z/, "") + has_qualifier = name.match?(/( \(.*\))+\z/) + + # If old tag was lowercase, e.g. [[ink tank (Splatoon)]], then keep new tag in lowercase. + if unqualified_name == unqualified_name.downcase + final_name = new_name + # If old tag was capitalized, e.g. [[Colored pencil (medium)]], then capitialize new tag. + elsif unqualified_name == unqualified_name.downcase.capitalize + final_name = new_name.capitalize + # If old tag was in titlecase, e.g. [[Hatsune Miku (cosplay)]], then titlecase new tag. + elsif unqualified_name == unqualified_name.split.map(&:capitalize).join(" ") + final_name = new_name.split.map(&:capitalize).join(" ") + # If we can't determine how to capitalize the new tag, then keep the old tag. + # e.g. [[Suzumiya Haruhi no Yuuutsu]] -> [[The Melancholy of Haruhi Suzumiya]] + else + next match + end + + if title.present? + "[[#{final_name}|#{title}]]" + elsif has_qualifier + "[[#{final_name}|]]" + else + "[[#{final_name}]]" + end + end + end + def self.strip_blocks(string, tag) n = 0 stripped = "" diff --git a/app/logical/tag_mover.rb b/app/logical/tag_mover.rb index 768ad2e1d..6bbd2bd86 100644 --- a/app/logical/tag_mover.rb +++ b/app/logical/tag_mover.rb @@ -14,6 +14,7 @@ class TagMover move_wiki! move_saved_searches! move_blacklists! + rewrite_wiki_links! move_posts! end end @@ -63,6 +64,10 @@ class TagMover User.rewrite_blacklists!(old_tag.name, new_tag.name) end + def rewrite_wiki_links! + WikiPage.rewrite_wiki_links!(old_tag.name, new_tag.name) + end + def merge_artists! old_artist.lock! new_artist.lock! diff --git a/app/models/wiki_page.rb b/app/models/wiki_page.rb index f3c9b5611..e10c1312f 100644 --- a/app/models/wiki_page.rb +++ b/app/models/wiki_page.rb @@ -222,6 +222,16 @@ class WikiPage < ApplicationRecord TagAlias.to_aliased(titles & tags) end + def self.rewrite_wiki_links!(old_name, new_name) + broken_wikis = WikiPage.linked_to(old_name) + + broken_wikis.each do |wiki| + wiki.lock! + wiki.body = DText.rewrite_wiki_links(wiki.body, old_name, new_name) + wiki.save! + end + end + def to_param if title =~ /\A\d+\z/ "~#{title}" diff --git a/test/unit/d_text_test.rb b/test/unit/d_text_test.rb index 0fc68062d..98bb9b415 100644 --- a/test/unit/d_text_test.rb +++ b/test/unit/d_text_test.rb @@ -5,6 +5,10 @@ class DTextTest < ActiveSupport::TestCase assert_equal(expected, DText.strip_dtext(dtext)) end + def assert_rewrite_wiki_links(expected, dtext, old, new) + assert_equal(expected, DText.rewrite_wiki_links(dtext, old, new)) + end + context "DText" do context "#strip_dtext" do should "strip dtext markup from the input" do @@ -125,6 +129,31 @@ class DTextTest < ActiveSupport::TestCase end end + context "#rewrite_wiki_links" do + should "work" do + assert_rewrite_wiki_links("[[rabbit]]", "[[bunny]]", "bunny", "rabbit") + assert_rewrite_wiki_links("[[rabbit|bun]]", "[[bunny|bun]]", "bunny", "rabbit") + + assert_rewrite_wiki_links("[[cat]] [[rabbit]]", "[[cat]] [[rabbit]]", "bunny", "rabbit") + assert_rewrite_wiki_links("I like [[cat]]s and [[bunny]]s", "I like [[cat]]s and [[rabbit]]s", "rabbit", "bunny") + + assert_rewrite_wiki_links("[[miku hatsune (cosplay)|]]", "[[hatsune miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)") + assert_rewrite_wiki_links("[[Miku hatsune (cosplay)|]]", "[[Hatsune miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)") + assert_rewrite_wiki_links("[[Miku Hatsune (cosplay)|]]", "[[Hatsune Miku (cosplay)]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)") + assert_rewrite_wiki_links("[[miku hatsune (cosplay)|miku]]", "[[hatsune miku (cosplay)|miku]]", "hatsune_miku_(cosplay)", "miku_hatsune_(cosplay)") + + assert_rewrite_wiki_links("[[the legend of zelda]]", "[[zelda no densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[The legend of zelda]]", "[[Zelda no densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[The Legend Of Zelda]]", "[[Zelda No Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[the legend of zelda]]", "[[zelda_no_densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[The legend of zelda]]", "[[Zelda_no_densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[The Legend Of Zelda]]", "[[Zelda_No_Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + + assert_rewrite_wiki_links("[[Zelda no Densetsu]]", "[[Zelda no Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + assert_rewrite_wiki_links("[[Zelda_no_Densetsu]]", "[[Zelda_no_Densetsu]]", "zelda_no_densetsu", "the_legend_of_zelda") + end + end + context "#from_html" do should "convert basic html to dtext" do assert_equal("[b]abc[/b] [i]def[/i] [u]ghi[/u]", DText.from_html("abc def ghi")) diff --git a/test/unit/tag_alias_test.rb b/test/unit/tag_alias_test.rb index bf6ae00bc..bd5af29d1 100644 --- a/test/unit/tag_alias_test.rb +++ b/test/unit/tag_alias_test.rb @@ -197,6 +197,17 @@ class TagAliasTest < ActiveSupport::TestCase assert_equal(%w[111 333], @wiki2.other_names) assert_equal("second", @wiki2.body) end + + should "rewrite links in other wikis to use the new tag" do + @wiki = create(:wiki_page, body: "foo [[aaa]] bar") + @ta = create(:tag_alias, antecedent_name: "aaa", consequent_name: "bbb") + + @ta.approve!(approver: @admin) + perform_enqueued_jobs + assert_equal("active", @ta.reload.status) + + assert_equal("foo [[bbb]] bar", @wiki.reload.body) + end end context "when the tags have artist entries" do