posts: normalize Unicode to NFC form in post sources.

Fix strings like "pokémon" (NFD form) and "pokémon" (NFC form) being
considered different strings in sources.

Also add a fix script to fix existing sources. There were only 15 posts
with unnormalized sources.
This commit is contained in:
evazion
2022-01-31 10:56:27 -06:00
parent 0132c5f0a5
commit 61c043c6b1
4 changed files with 26 additions and 33 deletions

View File

@@ -1340,6 +1340,15 @@ class PostTest < ActiveSupport::TestCase
end
context "with a source" do
context "that contains unicode characters" do
should "normalize the source to NFC form" do
source1 = "poke\u0301mon" # pokémon (nfd form)
source2 = "pok\u00e9mon" # pokémon (nfc form)
@post.update!(source: source1)
assert_equal(source2, @post.source)
end
end
context "that is not from pixiv" do
should "clear the pixiv id" do
@post.pixiv_id = 1234

View File

@@ -294,32 +294,4 @@ class UploadServiceTest < ActiveSupport::TestCase
end
end
end
context "#start!" do
subject { UploadService }
setup do
@source = "https://cdn.donmai.us/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
CurrentUser.user = travel_to(1.month.ago) do
FactoryBot.create(:user)
end
CurrentUser.ip_addr = "127.0.0.1"
end
teardown do
CurrentUser.user = nil
CurrentUser.ip_addr = nil
end
context "with a source containing unicode characters" do
should "normalize unicode characters in the source field" do
source1 = "poke\u0301mon" # pokémon (nfd form)
source2 = "pok\u00e9mon" # pokémon (nfc form)
service = subject.new(source: source1, rating: "s", file: upload_file("test/files/test.jpg"))
assert_nothing_raised { @upload = service.start! }
assert_equal(source2, @upload.source)
end
end
end
end