twitter: fix parsing of the artist name from the url.

Fixes URLs like https://twitter.com/intent/user?user_id=123 being
incorrectly normalized to http://twitter.com/intent/ in artist entries.

Also fixes the artist name to be taken from the url when it can't be
obtained from the api (when the tweet is deleted).
This commit is contained in:
evazion
2018-09-16 13:34:03 -05:00
parent 761f2649af
commit 325120ee51
4 changed files with 46 additions and 16 deletions

View File

@@ -132,7 +132,7 @@ module Sources
# The url to use for artist finding purposes. This will be stored in the # The url to use for artist finding purposes. This will be stored in the
# artist entry. Normally this will be the profile url. # artist entry. Normally this will be the profile url.
def normalize_for_artist_finder def normalize_for_artist_finder
profile_url || url profile_url.presence || url
end end
# A unique identifier for the artist. This is used for artist creation. # A unique identifier for the artist. This is used for artist creation.

View File

@@ -2,6 +2,12 @@ module Sources::Strategies
class Twitter < Base class Twitter < Base
PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i
ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)!i ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)!i
PROFILE = %r!\Ahttps?://(?:mobile\.)?twitter.com/(?<username>[a-z0-9_]+)!i
# Twitter provides a list but it's inaccurate; some names ('intent') aren't
# included and other names in the list aren't actually reserved.
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search]
def self.match?(*urls) def self.match?(*urls)
urls.compact.any? { |x| x =~ PAGE || x =~ ASSET} urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
@@ -17,6 +23,14 @@ module Sources::Strategies
return nil return nil
end end
def self.artist_name_from_url(url)
if url =~ PROFILE && !$~[:username].in?(RESERVED_USERNAMES)
$~[:username]
else
nil
end
end
def site_name def site_name
"Twitter" "Twitter"
end end
@@ -53,22 +67,18 @@ module Sources::Strategies
end end
def profile_url def profile_url
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i return "" if artist_name.blank?
if $1 != "i" "https://twitter.com/#{artist_name}"
return "https://twitter.com/#{$1}"
end
end
if artist_name.present?
return "https://twitter.com/" + artist_name
end
""
end end
def artist_name def artist_name
return "" if api_response.blank? if artist_name_from_url.present?
api_response.attrs[:user][:screen_name] artist_name_from_url
elsif api_response.present?
api_response.attrs[:user][:screen_name]
else
""
end
end end
def artist_commentary_title def artist_commentary_title
@@ -85,7 +95,7 @@ module Sources::Strategies
end end
def normalize_for_artist_finder def normalize_for_artist_finder
profile_url.try(:downcase) profile_url.try(:downcase).presence || url
end end
def tags def tags
@@ -135,5 +145,9 @@ module Sources::Strategies
[url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
end end
memoize :status_id memoize :status_id
def artist_name_from_url
[url, referer_url].map {|x| self.class.artist_name_from_url(x)}.compact.first
end
end end
end end

View File

@@ -152,6 +152,12 @@ class ArtistUrlTest < ActiveSupport::TestCase
assert_equal("http://twitter.com/aoimanabu/", url.normalized_url) assert_equal("http://twitter.com/aoimanabu/", url.normalized_url)
end end
should "normalize https://twitter.com/intent/user?user_id=* urls" do
url = FactoryBot.create(:artist_url, :url => "https://twitter.com/intent/user?user_id=2784590030")
assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url)
assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url)
end
should "normalize nijie urls" do should "normalize nijie urls" do
url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png") url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png")
assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url) assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url)

View File

@@ -91,7 +91,6 @@ module Sources
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key
@site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051") @site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051")
end end
should "get the image url" do should "get the image url" do
@@ -177,6 +176,17 @@ module Sources
end end
end end
context "A deleted tweet" do
should "still find the artist name" do
@site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656")
@artist = FactoryBot.create(:artist, name: "masayasuf", url_string: @site.url)
assert_equal("masayasuf", @site.artist_name)
assert_equal("https://twitter.com/masayasuf", @site.profile_url)
assert_equal([@artist], @site.artists)
end
end
context "A tweet" do context "A tweet" do
setup do setup do
skip "Twitter key is not set" unless Danbooru.config.twitter_api_key skip "Twitter key is not set" unless Danbooru.config.twitter_api_key