From 0cfd0ff436b30d7eaa0c05c72d088c97a8b47113 Mon Sep 17 00:00:00 2001 From: evazion Date: Mon, 3 Oct 2022 02:48:21 -0500 Subject: [PATCH] emails: add fix script to renormalize email addresses. Whenever the email address normalization procedure changes, the `normalized_address` column of the email address table must be updated. This is normally when the list of canonical domain mappings changes. Renormalizing addresses may also require deleting duplicates. --- app/logical/email_validator.rb | 20 +++++++++++++++++++- script/fixes/119_renormalize_emails.rb | 25 +++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100755 script/fixes/119_renormalize_emails.rb diff --git a/app/logical/email_validator.rb b/app/logical/email_validator.rb index 241f2314f..2585f2424 100644 --- a/app/logical/email_validator.rb +++ b/app/logical/email_validator.rb @@ -25,15 +25,18 @@ module EmailValidator CANONICAL_DOMAINS = { "googlemail.com" => "gmail.com", "hotmail.com.ar" => "outlook.com", + "hotmail.com.au" => "outlook.com", "hotmail.com.br" => "outlook.com", "hotmail.com.hk" => "outlook.com", "hotmail.com.tw" => "outlook.com", - "hotmail.co.uk" => "outlook.com", "hotmail.co.jp" => "outlook.com", + "hotmail.co.nz" => "outlook.com", "hotmail.co.th" => "outlook.com", + "hotmail.co.uk" => "outlook.com", "hotmail.com" => "outlook.com", "hotmail.be" => "outlook.com", "hotmail.ca" => "outlook.com", + "hotmail.cl" => "outlook.com", "hotmail.de" => "outlook.com", "hotmail.dk" => "outlook.com", "hotmail.es" => "outlook.com", @@ -44,6 +47,8 @@ module EmailValidator "hotmail.my" => "outlook.com", "hotmail.nl" => "outlook.com", "hotmail.no" => "outlook.com", + "hotmail.ru" => "outlook.com", + "hotmail.sg" => "outlook.com", "hotmail.se" => "outlook.com", "live.com.au" => "outlook.com", "live.com.ar" => "outlook.com", @@ -52,15 +57,20 @@ module EmailValidator "live.co.uk" => "outlook.com", "live.com" => "outlook.com", "live.at" => "outlook.com", + "live.be" => "outlook.com", "live.ca" => "outlook.com", "live.cl" => "outlook.com", "live.cn" => "outlook.com", "live.de" => "outlook.com", "live.dk" => "outlook.com", "live.fr" => "outlook.com", + "live.hk" => "outlook.com", + "live.ie" => "outlook.com", "live.it" => "outlook.com", "live.jp" => "outlook.com", "live.nl" => "outlook.com", + "live.no" => "outlook.com", + "live.ru" => "outlook.com", "live.se" => "outlook.com", "msn.com" => "outlook.com", "outlook.com.ar" => "outlook.com", @@ -78,6 +88,7 @@ module EmailValidator "outlook.cn" => "outlook.com", "outlook.de" => "outlook.com", "outlook.dk" => "outlook.com", + "outlook.es" => "outlook.com", "outlook.fr" => "outlook.com", "outlook.ie" => "outlook.com", "outlook.it" => "outlook.com", @@ -94,6 +105,7 @@ module EmailValidator "yahoo.com.cn" => "yahoo.com", "yahoo.com.hk" => "yahoo.com", "yahoo.com.mx" => "yahoo.com", + "yahoo.com.my" => "yahoo.com", "yahoo.com.ph" => "yahoo.com", "yahoo.com.sg" => "yahoo.com", "yahoo.com.tw" => "yahoo.com", @@ -103,13 +115,19 @@ module EmailValidator "yahoo.co.jp" => "yahoo.com", "yahoo.co.nz" => "yahoo.com", "yahoo.co.uk" => "yahoo.com", + "yahoo.co.th" => "yahoo.com", "yahoo.ne.jp" => "yahoo.com", "yahoo.ca" => "yahoo.com", "yahoo.cn" => "yahoo.com", "yahoo.de" => "yahoo.com", + "yahoo.dk" => "yahoo.com", "yahoo.es" => "yahoo.com", "yahoo.fr" => "yahoo.com", + "yahoo.ie" => "yahoo.com", + "yahoo.in" => "yahoo.com", "yahoo.it" => "yahoo.com", + "yahoo.no" => "yahoo.com", + "yahoo.se" => "yahoo.com", "ymail.com" => "yahoo.com", "126.com" => "163.com", "aim.com" => "aol.com", diff --git a/script/fixes/119_renormalize_emails.rb b/script/fixes/119_renormalize_emails.rb new file mode 100755 index 000000000..05b2f9b2d --- /dev/null +++ b/script/fixes/119_renormalize_emails.rb @@ -0,0 +1,25 @@ +#!/usr/bin/env ruby + +require_relative "base" + +with_confirmation do + emails = EmailAddress.find_each do |email| + normalized_address = EmailValidator.normalize(email.address) + + if email.normalized_address != normalized_address + dupe_emails = EmailAddress.where(normalized_address: normalized_address).joins(:user).to_a + + if dupe_emails.present? + dupe_emails += [email] + dupe_emails.sort_by! { |dupe_email| [-dupe_email.user.last_logged_in_at.to_i, -dupe_email.user.id] } + dupe_emails => [keep, *dupes] + + puts "#{"#{keep.address} (#{keep.user.name}##{keep.user.id})".ljust(60, " ")} DELETE #{dupes.map { |dupe| "#{dupe.address} (#{dupe.user.name}##{dupe.user.id})" }.join(" ")}" + dupes.each(&:destroy) if ENV.fetch("FIX", "false").truthy? + else + puts "#{email.normalized_address.ljust(60, " ")} #{normalized_address}" + email.update!(normalized_address: normalized_address) if ENV.fetch("FIX", false).to_s.truthy? + end + end + end +end