emails: add fix script to renormalize email addresses.

Whenever the email address normalization procedure changes, the
`normalized_address` column of the email address table must be updated.
This is normally when the list of canonical domain mappings changes.

Renormalizing addresses may also require deleting duplicates.
This commit is contained in:
evazion
2022-10-03 02:48:21 -05:00
parent 86e69e3401
commit 0cfd0ff436
2 changed files with 44 additions and 1 deletions

View File

@@ -25,15 +25,18 @@ module EmailValidator
CANONICAL_DOMAINS = {
"googlemail.com" => "gmail.com",
"hotmail.com.ar" => "outlook.com",
"hotmail.com.au" => "outlook.com",
"hotmail.com.br" => "outlook.com",
"hotmail.com.hk" => "outlook.com",
"hotmail.com.tw" => "outlook.com",
"hotmail.co.uk" => "outlook.com",
"hotmail.co.jp" => "outlook.com",
"hotmail.co.nz" => "outlook.com",
"hotmail.co.th" => "outlook.com",
"hotmail.co.uk" => "outlook.com",
"hotmail.com" => "outlook.com",
"hotmail.be" => "outlook.com",
"hotmail.ca" => "outlook.com",
"hotmail.cl" => "outlook.com",
"hotmail.de" => "outlook.com",
"hotmail.dk" => "outlook.com",
"hotmail.es" => "outlook.com",
@@ -44,6 +47,8 @@ module EmailValidator
"hotmail.my" => "outlook.com",
"hotmail.nl" => "outlook.com",
"hotmail.no" => "outlook.com",
"hotmail.ru" => "outlook.com",
"hotmail.sg" => "outlook.com",
"hotmail.se" => "outlook.com",
"live.com.au" => "outlook.com",
"live.com.ar" => "outlook.com",
@@ -52,15 +57,20 @@ module EmailValidator
"live.co.uk" => "outlook.com",
"live.com" => "outlook.com",
"live.at" => "outlook.com",
"live.be" => "outlook.com",
"live.ca" => "outlook.com",
"live.cl" => "outlook.com",
"live.cn" => "outlook.com",
"live.de" => "outlook.com",
"live.dk" => "outlook.com",
"live.fr" => "outlook.com",
"live.hk" => "outlook.com",
"live.ie" => "outlook.com",
"live.it" => "outlook.com",
"live.jp" => "outlook.com",
"live.nl" => "outlook.com",
"live.no" => "outlook.com",
"live.ru" => "outlook.com",
"live.se" => "outlook.com",
"msn.com" => "outlook.com",
"outlook.com.ar" => "outlook.com",
@@ -78,6 +88,7 @@ module EmailValidator
"outlook.cn" => "outlook.com",
"outlook.de" => "outlook.com",
"outlook.dk" => "outlook.com",
"outlook.es" => "outlook.com",
"outlook.fr" => "outlook.com",
"outlook.ie" => "outlook.com",
"outlook.it" => "outlook.com",
@@ -94,6 +105,7 @@ module EmailValidator
"yahoo.com.cn" => "yahoo.com",
"yahoo.com.hk" => "yahoo.com",
"yahoo.com.mx" => "yahoo.com",
"yahoo.com.my" => "yahoo.com",
"yahoo.com.ph" => "yahoo.com",
"yahoo.com.sg" => "yahoo.com",
"yahoo.com.tw" => "yahoo.com",
@@ -103,13 +115,19 @@ module EmailValidator
"yahoo.co.jp" => "yahoo.com",
"yahoo.co.nz" => "yahoo.com",
"yahoo.co.uk" => "yahoo.com",
"yahoo.co.th" => "yahoo.com",
"yahoo.ne.jp" => "yahoo.com",
"yahoo.ca" => "yahoo.com",
"yahoo.cn" => "yahoo.com",
"yahoo.de" => "yahoo.com",
"yahoo.dk" => "yahoo.com",
"yahoo.es" => "yahoo.com",
"yahoo.fr" => "yahoo.com",
"yahoo.ie" => "yahoo.com",
"yahoo.in" => "yahoo.com",
"yahoo.it" => "yahoo.com",
"yahoo.no" => "yahoo.com",
"yahoo.se" => "yahoo.com",
"ymail.com" => "yahoo.com",
"126.com" => "163.com",
"aim.com" => "aol.com",

View File

@@ -0,0 +1,25 @@
#!/usr/bin/env ruby
require_relative "base"
with_confirmation do
emails = EmailAddress.find_each do |email|
normalized_address = EmailValidator.normalize(email.address)
if email.normalized_address != normalized_address
dupe_emails = EmailAddress.where(normalized_address: normalized_address).joins(:user).to_a
if dupe_emails.present?
dupe_emails += [email]
dupe_emails.sort_by! { |dupe_email| [-dupe_email.user.last_logged_in_at.to_i, -dupe_email.user.id] }
dupe_emails => [keep, *dupes]
puts "#{"#{keep.address} (#{keep.user.name}##{keep.user.id})".ljust(60, " ")} DELETE #{dupes.map { |dupe| "#{dupe.address} (#{dupe.user.name}##{dupe.user.id})" }.join(" ")}"
dupes.each(&:destroy) if ENV.fetch("FIX", "false").truthy?
else
puts "#{email.normalized_address.ljust(60, " ")} #{normalized_address}"
email.update!(normalized_address: normalized_address) if ENV.fetch("FIX", false).to_s.truthy?
end
end
end
end