From edc7e523534ca88eef65529a59fa8377808495ac Mon Sep 17 00:00:00 2001 From: evazion Date: Fri, 14 Oct 2022 18:38:15 -0500 Subject: [PATCH] emails: automatically fix typos in email addresses. Try to automatically fix various kind of typos and common mistakes in email addresses when a user creates a new account. It's common for users to signup with addresses like `name@gmai.com`, which leads to bounces when we try to send the welcome email. --- app/logical/danbooru/email_address.rb | 130 ++++++++++++++++++++++++++ app/logical/email_address_type.rb | 29 ++++++ app/models/email_address.rb | 8 +- config/initializers/types.rb | 1 + config/locales/en.yml | 1 + test/unit/email_address_test.rb | 37 ++++---- 6 files changed, 187 insertions(+), 19 deletions(-) create mode 100644 app/logical/danbooru/email_address.rb create mode 100644 app/logical/email_address_type.rb diff --git a/app/logical/danbooru/email_address.rb b/app/logical/danbooru/email_address.rb new file mode 100644 index 000000000..705360263 --- /dev/null +++ b/app/logical/danbooru/email_address.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +# A utility class that represents an email address. A wrapper around Mail::Address +# that adds extra utility methods for normalizing and validating email addresses. +# +# @see https://www.rubydoc.info/gems/mail/Mail/Address +# @see app/logical/email_address_type.rb +# @see config/initializers/types.rb +module Danbooru + class EmailAddress + class Error < StandardError; end + + # https://www.regular-expressions.info/email.html + EMAIL_REGEX = /\A[a-z0-9._%+-]+@(?:[a-z0-9][a-z0-9-]{0,61}\.)+[a-z]{2,}\z/i + + # @return [String] The original email address as a string. + attr_reader :address + + # @return [Mail::Address] The parsed email address. + attr_reader :parsed_address + + delegate :local, to: :parsed_address + alias_method :name, :local + alias_method :to_s, :address + + # Parse a string into an email address, or raise an exception if the string is not a syntactically valid address. + # + # @param string [String, Danbooru::EmailAddress] + def initialize(string) + raise Error, "#{string} is not a valid email address" if !string.match?(EMAIL_REGEX) + + @address = string.to_s + @parsed_address = Mail::Address.new(parsed_address) + end + + # Parse a string into an email address, or return nil if the string is not a syntactically valid email address. + # + # @param url [String, Danbooru::EmailAddress] + # @return [Danbooru::EmailAddress] + def self.parse(address) + new(address) + rescue Error + nil + end + + # Parse a string into an email address while attempting to fix common typos and mistakes, or return + # nil if the string can't be normalized into a valid email address. + # + # @param address [String] + # @return [Danbooru::EmailAddress] + def self.normalize(address) + address = address.gsub(/[[:space:]]+/, " ").strip + + address = address.gsub(/[\\\/]$/, '') # @qq.com\ -> @qq.com, @web.de/ -> @web.de + #address = address.gsub(/,/, ".") # foo,bar@gmail.com -> foo.bar@gmail.com | @gmail,com -> @gmail.com + address = address.gsub(/^https?:\/\/(www\.)?/i, "") # https://xxx@gmail.com -> xxx@gmail.com + address = address.gsub(/^mailto:/i, "") # mailto:foo@gmail.com -> foo@gmail.com + address = address.gsub(/.* <(.*)>$/, '\1') # foo -> bar@gmail.com + address = address.gsub(/@\./, "@") # @.gmail.com -> @gmail.com + address = address.gsub(/@com$/i, ".com") # @gmail@com -> @gmail.com + address = address.gsub(/\.co,$/i, '.com') # @gmail.co, -> @gmail.com + address = address.gsub(/\.com.$/i, '.com') # @gmail.com, -> @gmail.com + address = address.gsub(/\.con$/i, '.com') # @gmail.con -> @gmail.com + address = address.gsub(/\.\.com$/i, '.com') # @gmail..com -> @gmail.com + + # @gmail -> @gmail.com + address = address.gsub(/@gmai$/i, "@gmail.com") + address = address.gsub(/@gmail$/i, "@gmail.com") + address = address.gsub(/@yahoo$/i, "@yahoo.com") + address = address.gsub(/@hotmai$/i, "@hotmail.com") + address = address.gsub(/@hotmail$/i, "@hotmail.com") + address = address.gsub(/@hot[^m]ail$/i, "@hotmail.com") + address = address.gsub(/@live$/i, "@live.com") + + address = address.gsub(/@.gmail\.com$/i, "@gmail.com") # @-gmail.com -> @gmail.com + address = address.gsub(/@g.ail\.com$/i, "@gmail.com") # @g,ail.com -> @gmail.com + address = address.gsub(/@gmail\.co.$/i, "@gmail.com") # @gmail.co, -> @gmail.com + address = address.gsub(/@gamil\.com$/i, "@gmail.com") # @gamil.com -> @gmail.com + address = address.gsub(/@gnail\.com$/i, "@gmail.com") # @gnail.com -> @gmail.com + address = address.gsub(/@gmail\.co$/i, "@gmail.com") # @gmail.co -> @gmail.com + address = address.gsub(/@gmai.\.com$/i, "@gmail.com") # @gmai;.com -> @gmail.com + address = address.gsub(/@gmai\.com$/i, "@gmail.com") # @gmai.com -> @gmail.com + address = address.gsub(/@gmai\.co$/i, "@gmail.com") # @gmai.co -> @gmail.com + address = address.gsub(/@hotmai\.com$/i, "@hotmail.com") # @hotmai.com -> @hotmail.com + address = address.gsub(/@hot.ail\.com$/i, "@hotmail.com") # @hot.ail.com -> @hotmail.com + address = address.gsub(/@hot.mail\.com$/i, "@hotmail.com") # @hot,mail.com -> @hotmail.com + address = address.gsub(/@hanm.ail\.net$/i, "@hanmail.net") # @hanmiail.net -> @hanmail.net + + address = address.gsub(/@(gmail|yahoo|hotmail|outlook|live).com$/i, '@\1.com') # @gmail,com -> @gmail.com + address = address.gsub(/@(gmail|yahoo|hotmail|outlook|live)com$/i, '@\1.com') # @gmailcom -> @gmail.com + + address = address.gsub(/@([a-z]+)\.com@\1\.com$/i, '@\1.com') # @gmail.com@gmail.com -> @gmail.com + address = address.gsub(/@([a-z]+)@\1\.com$/i, '@\1.com') # @gmail@gmail.com -> @gmail.com + + address = address.gsub(/(@.*)$/) { $1.downcase } # @Gmail.com -> @gmail.com + + parse(address) + end + + # @return [Danbooru::EmailAddress] The email address, normalized to fix typos. + def normalized_address + Danbooru::EmailAddress.normalize(address) + end + + # @return [PublicSuffix::Domain] The domain part of the email address. + def domain + @domain ||= PublicSuffix.parse(parsed_address.domain) + rescue PublicSuffix::DomainNotAllowed + nil + end + + def as_json + to_s + end + + def inspect + "#" + end + + def ==(other) + self.class == other.class && to_s == other.to_s + end + + def hash + to_s.hash + end + + alias_method :eql?, :== + end +end diff --git a/app/logical/email_address_type.rb b/app/logical/email_address_type.rb new file mode 100644 index 000000000..c2a84e0ce --- /dev/null +++ b/app/logical/email_address_type.rb @@ -0,0 +1,29 @@ +# frozen_string_literal: true + +# Define a custom email address type that allows models to declare attributes of type Danbooru::EmailAddress. +# +# @see app/logical/danbooru/email_address.rb +# @see config/initializers/types.rb +# @see https://www.bigbinary.com/blog/rails-5-attributes-api +# @see https://api.rubyonrails.org/classes/ActiveModel/Type/Value.html +class EmailAddressType < ActiveRecord::Type::Value + # Cast a String (or nil) value from the database to a Danbooru::EmailAddress object. + # + # @param value [String] the email address from the database + # @return [Danbooru::EmailAddress] + def cast(value) + return nil if value.blank? + super(Danbooru::EmailAddress.new(value)) + rescue Danbooru::EmailAddress::Error + nil + end + + # Serialize a Danbooru::EmailAddress to a String for the database. + # + # @param value [Danbooru::EmailAddress] the email address object + # @return [String] + def serialize(value) + return value.to_s if value.is_a?(Danbooru::EmailAddress) + super value + end +end diff --git a/app/models/email_address.rb b/app/models/email_address.rb index d4911b251..dcd670b62 100644 --- a/app/models/email_address.rb +++ b/app/models/email_address.rb @@ -3,8 +3,11 @@ class EmailAddress < ApplicationRecord belongs_to :user, inverse_of: :email_address - validates :address, presence: true, confirmation: true, format: { with: EmailValidator::EMAIL_REGEX } - validates :normalized_address, uniqueness: true + attribute :address + attribute :normalized_address + + validates :address, presence: true, format: { message: "is invalid", with: EmailValidator::EMAIL_REGEX } + validates :normalized_address, presence: true, uniqueness: true validates :user_id, uniqueness: true validate :validate_deliverable, on: :deliverable @@ -17,6 +20,7 @@ class EmailAddress < ApplicationRecord end def address=(value) + value = Danbooru::EmailAddress.normalize(value)&.to_s || value self.normalized_address = EmailValidator.normalize(value) || address super end diff --git a/config/initializers/types.rb b/config/initializers/types.rb index 38f86e36c..83b0fb819 100644 --- a/config/initializers/types.rb +++ b/config/initializers/types.rb @@ -1,3 +1,4 @@ Rails.application.reloader.to_prepare do ActiveRecord::Type.register(:ip_address, IpAddressType) + ActiveRecord::Type.register(:email_address, EmailAddressType) end diff --git a/config/locales/en.yml b/config/locales/en.yml index eaa3ed634..97d290b49 100644 --- a/config/locales/en.yml +++ b/config/locales/en.yml @@ -69,6 +69,7 @@ en: uploader: "You" uploader_id: "You" user/email_address: + address: "Email address" normalized_address: "Email address" user_feedback: creator: "You" diff --git a/test/unit/email_address_test.rb b/test/unit/email_address_test.rb index 716722fb5..cc9914661 100644 --- a/test/unit/email_address_test.rb +++ b/test/unit/email_address_test.rb @@ -12,29 +12,32 @@ class EmailAddressTest < ActiveSupport::TestCase should allow_value("foo+bar@gmail.com").for(:address) should allow_value("foo@foo.bar.com").for(:address) - should_not allow_value("foo@gmail.com ").for(:address) - should_not allow_value(" foo@gmail.com").for(:address) - should_not allow_value("foo@-gmail.com").for(:address) - should_not allow_value("foo@.gmail.com").for(:address) - should_not allow_value("foo@gmail").for(:address) - should_not allow_value("foo@gmail.").for(:address) - should_not allow_value("foo@gmail,com").for(:address) - should_not allow_value("foo@gmail.com.").for(:address) - should_not allow_value("foo@gmail.co,").for(:address) + should_not allow_value("foo@example").for(:address) should_not allow_value("fooqq@.com").for(:address) - should_not allow_value("foo@gmail..com").for(:address) - should_not allow_value("foo@gmailcom").for(:address) - should_not allow_value("mailto:foo@gmail.com").for(:address) should_not allow_value('foo"bar"@gmail.com').for(:address) should_not allow_value('foo@gmail.com').for(:address) - should_not allow_value("foo@gmail.com@gmail.com").for(:address) - should_not allow_value("foo@g,ail.com").for(:address) - should_not allow_value("foo@gmai;.com").for(:address) - should_not allow_value("foo@gmail@com").for(:address) - should_not allow_value("foo@gmail.c").for(:address) should_not allow_value("foo@foo.-bar.com").for(:address) should_not allow_value("foo@127.0.0.1").for(:address) should_not allow_value("foo@localhost").for(:address) end + + should "fix typos" do + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail.com ").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: " foo@gmail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail.com\n").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@-gmail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@.gmail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail,com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail.com.").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail.co,").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail..com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmailcom").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "mailto:foo@gmail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail.com@gmail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@g,ail.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmai;.com").address.to_s) + assert_equal("foo@gmail.com", EmailAddress.new(address: "foo@gmail@com").address.to_s) + end end end