posts: normalize Unicode to NFC form in post sources.
Fix strings like "pokémon" (NFD form) and "pokémon" (NFC form) being considered different strings in sources. Also add a fix script to fix existing sources. There were only 15 posts with unnormalized sources.
This commit is contained in:
@@ -1,6 +1,4 @@
|
|||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
# normalize unicode in non-web sources
|
|
||||||
# normalize percent-encode unicode in source urls
|
|
||||||
|
|
||||||
class Post < ApplicationRecord
|
class Post < ApplicationRecord
|
||||||
class RevertError < StandardError; end
|
class RevertError < StandardError; end
|
||||||
@@ -14,9 +12,9 @@ class Post < ApplicationRecord
|
|||||||
|
|
||||||
deletable
|
deletable
|
||||||
|
|
||||||
|
normalize :source, :normalize_source
|
||||||
before_validation :merge_old_changes
|
before_validation :merge_old_changes
|
||||||
before_validation :normalize_tags
|
before_validation :normalize_tags
|
||||||
before_validation :strip_source
|
|
||||||
before_validation :parse_pixiv_id
|
before_validation :parse_pixiv_id
|
||||||
before_validation :blank_out_nonexistent_parents
|
before_validation :blank_out_nonexistent_parents
|
||||||
before_validation :remove_parent_loops
|
before_validation :remove_parent_loops
|
||||||
@@ -1334,8 +1332,8 @@ class Post < ApplicationRecord
|
|||||||
self
|
self
|
||||||
end
|
end
|
||||||
|
|
||||||
def strip_source
|
def self.normalize_source(source)
|
||||||
self.source = source.try(:strip)
|
source.to_s.strip.unicode_normalize(:nfc)
|
||||||
end
|
end
|
||||||
|
|
||||||
def mark_as_translated(params)
|
def mark_as_translated(params)
|
||||||
|
|||||||
14
script/fixes/097_normalize_post_sources.rb
Executable file
14
script/fixes/097_normalize_post_sources.rb
Executable file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require_relative "base"
|
||||||
|
|
||||||
|
with_confirmation do
|
||||||
|
CurrentUser.scoped(User.system, "127.0.0.1") do
|
||||||
|
Post.where("source ~ '[^[:ascii:]]'").find_each do |post|
|
||||||
|
next if post.source.unicode_normalize(:nfc) == post.source
|
||||||
|
|
||||||
|
post.update!(source: post.source)
|
||||||
|
puts({ id: post.id, old_source: post.source_before_last_save, new_source: post.source })
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
@@ -1340,6 +1340,15 @@ class PostTest < ActiveSupport::TestCase
|
|||||||
end
|
end
|
||||||
|
|
||||||
context "with a source" do
|
context "with a source" do
|
||||||
|
context "that contains unicode characters" do
|
||||||
|
should "normalize the source to NFC form" do
|
||||||
|
source1 = "poke\u0301mon" # pokémon (nfd form)
|
||||||
|
source2 = "pok\u00e9mon" # pokémon (nfc form)
|
||||||
|
@post.update!(source: source1)
|
||||||
|
assert_equal(source2, @post.source)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
context "that is not from pixiv" do
|
context "that is not from pixiv" do
|
||||||
should "clear the pixiv id" do
|
should "clear the pixiv id" do
|
||||||
@post.pixiv_id = 1234
|
@post.pixiv_id = 1234
|
||||||
|
|||||||
@@ -294,32 +294,4 @@ class UploadServiceTest < ActiveSupport::TestCase
|
|||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
context "#start!" do
|
|
||||||
subject { UploadService }
|
|
||||||
|
|
||||||
setup do
|
|
||||||
@source = "https://cdn.donmai.us/original/d3/4e/d34e4cf0a437a5d65f8e82b7bcd02606.jpg"
|
|
||||||
CurrentUser.user = travel_to(1.month.ago) do
|
|
||||||
FactoryBot.create(:user)
|
|
||||||
end
|
|
||||||
CurrentUser.ip_addr = "127.0.0.1"
|
|
||||||
end
|
|
||||||
|
|
||||||
teardown do
|
|
||||||
CurrentUser.user = nil
|
|
||||||
CurrentUser.ip_addr = nil
|
|
||||||
end
|
|
||||||
|
|
||||||
context "with a source containing unicode characters" do
|
|
||||||
should "normalize unicode characters in the source field" do
|
|
||||||
source1 = "poke\u0301mon" # pokémon (nfd form)
|
|
||||||
source2 = "pok\u00e9mon" # pokémon (nfc form)
|
|
||||||
service = subject.new(source: source1, rating: "s", file: upload_file("test/files/test.jpg"))
|
|
||||||
|
|
||||||
assert_nothing_raised { @upload = service.start! }
|
|
||||||
assert_equal(source2, @upload.source)
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user