From 21747e1f8e0fef2632a5b8e64041a67720c4eb74 Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 2 Oct 2022 20:35:24 -0500 Subject: [PATCH] emails: add fix script to fix invalid email addresses. Add a fix script that fixes invalid email addresses if they can be fixed, otherwise they're deleted. For a long time we didn't have any email validation, so we ended up with a lot of invalid email addresses containing typos or other random garbage. This tries to fix the most common typos when possible, otherwise the email address is deleted. In many cases the user created two accounts, one with a typo in the email and one with the correct email. In these cases we can't fix the invalid email, so we just delete it. --- script/fixes/117_fix_invalid_emails.rb | 99 ++++++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100755 script/fixes/117_fix_invalid_emails.rb diff --git a/script/fixes/117_fix_invalid_emails.rb b/script/fixes/117_fix_invalid_emails.rb new file mode 100755 index 000000000..c7b33464f --- /dev/null +++ b/script/fixes/117_fix_invalid_emails.rb @@ -0,0 +1,99 @@ +#!/usr/bin/env ruby + +require_relative "base" + +with_confirmation do + emails = EmailAddress.where_not_regex(:address, '^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$') # invalid emails + + emails.find_each do |email| + old_address = email.address + address = email.address + + address = address.gsub(/\r|\n/, " ") + address = address.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") + + # foo,bar@gmail.com -> foo.bar@gmail.com | @gmail,com -> @gmail.com + address = address.gsub(/,/, ".") + + address = address.gsub(/[\\\/]$/, '') # @qq.com\ -> @qq.com, @web.de/ -> @web.de + address = address.gsub(/^https?:\/\/(www\.)?/i, "") # https://xxx@gmail.com -> xxx@gmail.com + address = address.gsub(/^mailto:/i, "") # mailto:foo@gmail.com -> foo@gmail.com + address = address.gsub(/.* <(.*)>$/, '\1') # foo -> bar@gmail.com + + # "@gmail" followed by anything that isn't a common domain + address = address.gsub(/@gmail(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@gmail.com") + address = address.gsub(/@yahoo(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@yahoo.com") + address = address.gsub(/@hotmail(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@hotmail.com") + address = address.gsub(/@yandex(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@yandex.ru") + + address = address.gsub(/@\./, "@") # @.gmail.com -> @gmail.com + address = address.gsub(/@com$/i, ".com") # @gmail@com -> @gmail.com + + address = address.gsub(/\.co,$/i, '.com') # @gmail.co, -> @gmail.com + address = address.gsub(/\.com.$/i, '.com') # @gmail.com, -> @gmail.com + address = address.gsub(/\.con$/i, '.com') # @gmail.con -> @gmail.com + + # "@gmail com" -> @gmail.com | @gmail,com -> @gmail.com | @gmail..com -> @gmail.com + address = address.gsub(/(?:[ ,]|\.\.)(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co)$/i, '.\1') + + # @gmail -> @gmail.com + address = address.gsub(/@gmai$/i, "@gmail.com") + address = address.gsub(/@gmail$/i, "@gmail.com") + address = address.gsub(/@yahoo$/i, "@yahoo.com") + address = address.gsub(/@hotmai$/i, "@hotmail.com") + address = address.gsub(/@hotmail$/i, "@hotmail.com") + address = address.gsub(/@hot[^m]ail$/i, "@hotmail.com") + address = address.gsub(/@interia$/i, "@interia.pl") + address = address.gsub(/@live$/i, "@live.com") + address = address.gsub(/@mailinator$/i, "@mailinator.com") + address = address.gsub(/@naver$/i, "@naver.com") + address = address.gsub(/@verizon$/i, "@verizon.net") + + # @gmailcom -> @gmail.com + address = address.gsub(/@(gmail|yahoo|hotmail|aol|163)com$/i, '@\1.com') + + address = address.gsub(/@gamil\.com$/i, "@gmail.com") # @gamil.com -> @gmail.com + address = address.gsub(/@gmai\.com$/i, "@gmail.com") # @gmai.com -> @gmail.com + address = address.gsub(/@gmai\.co$/i, "@gmail.com") # @gmai.co -> @gmail.com + address = address.gsub(/@hotmai\.com$/i, "@hotmail.com") # @hotmai.com -> @hotmail.com + address = address.gsub(/@hot.ail\.com$/i, "@hotmail.com") # @hot.ail.com -> @hotmail.com + address = address.gsub(/@hot.mail\.com$/i, "@hotmail.com") # @hot,mail.com -> @hotmail.com + + address = address.gsub(/@hotmail.com$/i, "@hotmail.com") # @hotmail,com -> @hotmail.com + address = address.gsub(/@yahoo.com$/i, "@yahoo.com") + address = address.gsub(/@mail.ru$/i, "@mail.ru") + + address = address.gsub(/@([a-z]+)\.com@\1\.com$/i, '@\1.com') # @gmail.com@gmail.com -> @gmail.com + address = address.gsub(/@([a-z]+)@\1\.com$/i, '@\1.com') # @gmail@gmail.com -> @gmail.com + #address = address.gsub(/@gmail@com$/, "@gmail.com") + #address = address.gsub(/@aol@aol\.com$/, "@aol.com") + address = address.gsub(/@tuta@io$/i, "@tuta.io") + + # cyrillic to latin + cyrillic = { "а": "a", "А": "A", "С": "C", "е": "e", "Е": "E", "К": "K", "М": "M", "о": "o", "О": "O", "Т": "T" }.stringify_keys + address = address.gsub(/[^[:ascii:]]/) { cyrillic.fetch(_1, _1) } + #address = I18n.transliterate(address) + + address = address.downcase.gsub(/^(.*)\1$/i, '\1') if address.downcase.match?(/^(.*)\1$/i) # Foo@gmail.comfoo@gmail.com -> foo@gmail.com + address = address.downcase.gsub(/^(.*)@\1@[a-zA-Z]+\.com$/i, '\1') if address.downcase.match?(/^(.*)@\1@[a-zA-Z]+\.com$/i) # foo@foo@gmail.com -> foo@gmail.com + + normalized_address = EmailValidator.normalize(address) + dupe_emails = EmailAddress.where(normalized_address: normalized_address).excluding(email) + if dupe_emails.present? + puts "#{old_address.ljust(40, " ")} DELETE (#{dupe_emails.map { "#{_1.user.name}##{_1.user.id}" }.join(", ")}, #{email.user.name}##{email.user.id})" + email.destroy if ENV.fetch("FIX", "false").truthy? + elsif address.match?(/^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$/) + puts "#{old_address.ljust(40, " ").gsub(/\r|\n/, "")} #{address}" + email.user.update!(email_address_attributes: { address: address }) if ENV.fetch("FIX", "false").truthy? + else + puts "#{old_address.ljust(40, " ")} DELETE" + email.destroy if ENV.fetch("FIX", "false").truthy? + end + end + + emails = EmailAddress.where_not_regex(:normalized_address, '^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$') + emails.find_each do |email| + puts "#{email.address.ljust(40, " ")} DELETE" + email.destroy if ENV.fetch("FIX", "false").truthy? + end +end