emails: add fix script to fix invalid email addresses.
Add a fix script that fixes invalid email addresses if they can be fixed, otherwise they're deleted. For a long time we didn't have any email validation, so we ended up with a lot of invalid email addresses containing typos or other random garbage. This tries to fix the most common typos when possible, otherwise the email address is deleted. In many cases the user created two accounts, one with a typo in the email and one with the correct email. In these cases we can't fix the invalid email, so we just delete it.
This commit is contained in:
99
script/fixes/117_fix_invalid_emails.rb
Executable file
99
script/fixes/117_fix_invalid_emails.rb
Executable file
@@ -0,0 +1,99 @@
|
|||||||
|
#!/usr/bin/env ruby
|
||||||
|
|
||||||
|
require_relative "base"
|
||||||
|
|
||||||
|
with_confirmation do
|
||||||
|
emails = EmailAddress.where_not_regex(:address, '^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$') # invalid emails
|
||||||
|
|
||||||
|
emails.find_each do |email|
|
||||||
|
old_address = email.address
|
||||||
|
address = email.address
|
||||||
|
|
||||||
|
address = address.gsub(/\r|\n/, " ")
|
||||||
|
address = address.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
|
||||||
|
|
||||||
|
# foo,bar@gmail.com -> foo.bar@gmail.com | @gmail,com -> @gmail.com
|
||||||
|
address = address.gsub(/,/, ".")
|
||||||
|
|
||||||
|
address = address.gsub(/[\\\/]$/, '') # @qq.com\ -> @qq.com, @web.de/ -> @web.de
|
||||||
|
address = address.gsub(/^https?:\/\/(www\.)?/i, "") # https://xxx@gmail.com -> xxx@gmail.com
|
||||||
|
address = address.gsub(/^mailto:/i, "") # mailto:foo@gmail.com -> foo@gmail.com
|
||||||
|
address = address.gsub(/.* <(.*)>$/, '\1') # foo <bar@gmail.com> -> bar@gmail.com
|
||||||
|
|
||||||
|
# "@gmail" followed by anything that isn't a common domain
|
||||||
|
address = address.gsub(/@gmail(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@gmail.com")
|
||||||
|
address = address.gsub(/@yahoo(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@yahoo.com")
|
||||||
|
address = address.gsub(/@hotmail(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@hotmail.com")
|
||||||
|
address = address.gsub(/@yandex(?![a-z0-9]{2,})(?!.(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co\.[a-z]{2}|plala\.or\.jp)).*/i, "@yandex.ru")
|
||||||
|
|
||||||
|
address = address.gsub(/@\./, "@") # @.gmail.com -> @gmail.com
|
||||||
|
address = address.gsub(/@com$/i, ".com") # @gmail@com -> @gmail.com
|
||||||
|
|
||||||
|
address = address.gsub(/\.co,$/i, '.com') # @gmail.co, -> @gmail.com
|
||||||
|
address = address.gsub(/\.com.$/i, '.com') # @gmail.com, -> @gmail.com
|
||||||
|
address = address.gsub(/\.con$/i, '.com') # @gmail.con -> @gmail.com
|
||||||
|
|
||||||
|
# "@gmail com" -> @gmail.com | @gmail,com -> @gmail.com | @gmail..com -> @gmail.com
|
||||||
|
address = address.gsub(/(?:[ ,]|\.\.)(com|net|org|info|ru|fr|it|nl|hu|de|fi|jp|se|ca|cn|cx|cz|dk|tw|su|es|no|ch|br|pl|co)$/i, '.\1')
|
||||||
|
|
||||||
|
# @gmail -> @gmail.com
|
||||||
|
address = address.gsub(/@gmai$/i, "@gmail.com")
|
||||||
|
address = address.gsub(/@gmail$/i, "@gmail.com")
|
||||||
|
address = address.gsub(/@yahoo$/i, "@yahoo.com")
|
||||||
|
address = address.gsub(/@hotmai$/i, "@hotmail.com")
|
||||||
|
address = address.gsub(/@hotmail$/i, "@hotmail.com")
|
||||||
|
address = address.gsub(/@hot[^m]ail$/i, "@hotmail.com")
|
||||||
|
address = address.gsub(/@interia$/i, "@interia.pl")
|
||||||
|
address = address.gsub(/@live$/i, "@live.com")
|
||||||
|
address = address.gsub(/@mailinator$/i, "@mailinator.com")
|
||||||
|
address = address.gsub(/@naver$/i, "@naver.com")
|
||||||
|
address = address.gsub(/@verizon$/i, "@verizon.net")
|
||||||
|
|
||||||
|
# @gmailcom -> @gmail.com
|
||||||
|
address = address.gsub(/@(gmail|yahoo|hotmail|aol|163)com$/i, '@\1.com')
|
||||||
|
|
||||||
|
address = address.gsub(/@gamil\.com$/i, "@gmail.com") # @gamil.com -> @gmail.com
|
||||||
|
address = address.gsub(/@gmai\.com$/i, "@gmail.com") # @gmai.com -> @gmail.com
|
||||||
|
address = address.gsub(/@gmai\.co$/i, "@gmail.com") # @gmai.co -> @gmail.com
|
||||||
|
address = address.gsub(/@hotmai\.com$/i, "@hotmail.com") # @hotmai.com -> @hotmail.com
|
||||||
|
address = address.gsub(/@hot.ail\.com$/i, "@hotmail.com") # @hot.ail.com -> @hotmail.com
|
||||||
|
address = address.gsub(/@hot.mail\.com$/i, "@hotmail.com") # @hot,mail.com -> @hotmail.com
|
||||||
|
|
||||||
|
address = address.gsub(/@hotmail.com$/i, "@hotmail.com") # @hotmail,com -> @hotmail.com
|
||||||
|
address = address.gsub(/@yahoo.com$/i, "@yahoo.com")
|
||||||
|
address = address.gsub(/@mail.ru$/i, "@mail.ru")
|
||||||
|
|
||||||
|
address = address.gsub(/@([a-z]+)\.com@\1\.com$/i, '@\1.com') # @gmail.com@gmail.com -> @gmail.com
|
||||||
|
address = address.gsub(/@([a-z]+)@\1\.com$/i, '@\1.com') # @gmail@gmail.com -> @gmail.com
|
||||||
|
#address = address.gsub(/@gmail@com$/, "@gmail.com")
|
||||||
|
#address = address.gsub(/@aol@aol\.com$/, "@aol.com")
|
||||||
|
address = address.gsub(/@tuta@io$/i, "@tuta.io")
|
||||||
|
|
||||||
|
# cyrillic to latin
|
||||||
|
cyrillic = { "а": "a", "А": "A", "С": "C", "е": "e", "Е": "E", "К": "K", "М": "M", "о": "o", "О": "O", "Т": "T" }.stringify_keys
|
||||||
|
address = address.gsub(/[^[:ascii:]]/) { cyrillic.fetch(_1, _1) }
|
||||||
|
#address = I18n.transliterate(address)
|
||||||
|
|
||||||
|
address = address.downcase.gsub(/^(.*)\1$/i, '\1') if address.downcase.match?(/^(.*)\1$/i) # Foo@gmail.comfoo@gmail.com -> foo@gmail.com
|
||||||
|
address = address.downcase.gsub(/^(.*)@\1@[a-zA-Z]+\.com$/i, '\1') if address.downcase.match?(/^(.*)@\1@[a-zA-Z]+\.com$/i) # foo@foo@gmail.com -> foo@gmail.com
|
||||||
|
|
||||||
|
normalized_address = EmailValidator.normalize(address)
|
||||||
|
dupe_emails = EmailAddress.where(normalized_address: normalized_address).excluding(email)
|
||||||
|
if dupe_emails.present?
|
||||||
|
puts "#{old_address.ljust(40, " ")} DELETE (#{dupe_emails.map { "#{_1.user.name}##{_1.user.id}" }.join(", ")}, #{email.user.name}##{email.user.id})"
|
||||||
|
email.destroy if ENV.fetch("FIX", "false").truthy?
|
||||||
|
elsif address.match?(/^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$/)
|
||||||
|
puts "#{old_address.ljust(40, " ").gsub(/\r|\n/, "")} #{address}"
|
||||||
|
email.user.update!(email_address_attributes: { address: address }) if ENV.fetch("FIX", "false").truthy?
|
||||||
|
else
|
||||||
|
puts "#{old_address.ljust(40, " ")} DELETE"
|
||||||
|
email.destroy if ENV.fetch("FIX", "false").truthy?
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
emails = EmailAddress.where_not_regex(:normalized_address, '^[a-zA-Z0-9._%+-]+@([a-zA-Z0-9][a-zA-Z0-9-]{0,61}\.)+[a-zA-Z]{2,}$')
|
||||||
|
emails.find_each do |email|
|
||||||
|
puts "#{email.address.ljust(40, " ")} DELETE"
|
||||||
|
email.destroy if ENV.fetch("FIX", "false").truthy?
|
||||||
|
end
|
||||||
|
end
|
||||||
Reference in New Issue
Block a user