From 325120ee5172b060f72b0611b06fafe261fbfc06 Mon Sep 17 00:00:00 2001 From: evazion Date: Sun, 16 Sep 2018 13:34:03 -0500 Subject: [PATCH] twitter: fix parsing of the artist name from the url. Fixes URLs like https://twitter.com/intent/user?user_id=123 being incorrectly normalized to http://twitter.com/intent/ in artist entries. Also fixes the artist name to be taken from the url when it can't be obtained from the api (when the tweet is deleted). --- app/logical/sources/strategies/base.rb | 2 +- app/logical/sources/strategies/twitter.rb | 42 +++++++++++++++-------- test/unit/artist_url_test.rb | 6 ++++ test/unit/sources/twitter_test.rb | 12 ++++++- 4 files changed, 46 insertions(+), 16 deletions(-) diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 9d87b2273..3fdea3938 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -132,7 +132,7 @@ module Sources # The url to use for artist finding purposes. This will be stored in the # artist entry. Normally this will be the profile url. def normalize_for_artist_finder - profile_url || url + profile_url.presence || url end # A unique identifier for the artist. This is used for artist creation. diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index b1c83a261..f36b03cba 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -2,6 +2,12 @@ module Sources::Strategies class Twitter < Base PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)!i + PROFILE = %r!\Ahttps?://(?:mobile\.)?twitter.com/(?[a-z0-9_]+)!i + + # Twitter provides a list but it's inaccurate; some names ('intent') aren't + # included and other names in the list aren't actually reserved. + # https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration + RESERVED_USERNAMES = %w[home i intent search] def self.match?(*urls) urls.compact.any? { |x| x =~ PAGE || x =~ ASSET} @@ -17,6 +23,14 @@ module Sources::Strategies return nil end + def self.artist_name_from_url(url) + if url =~ PROFILE && !$~[:username].in?(RESERVED_USERNAMES) + $~[:username] + else + nil + end + end + def site_name "Twitter" end @@ -53,22 +67,18 @@ module Sources::Strategies end def profile_url - if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i - if $1 != "i" - return "https://twitter.com/#{$1}" - end - end - - if artist_name.present? - return "https://twitter.com/" + artist_name - end - - "" + return "" if artist_name.blank? + "https://twitter.com/#{artist_name}" end def artist_name - return "" if api_response.blank? - api_response.attrs[:user][:screen_name] + if artist_name_from_url.present? + artist_name_from_url + elsif api_response.present? + api_response.attrs[:user][:screen_name] + else + "" + end end def artist_commentary_title @@ -85,7 +95,7 @@ module Sources::Strategies end def normalize_for_artist_finder - profile_url.try(:downcase) + profile_url.try(:downcase).presence || url end def tags @@ -135,5 +145,9 @@ module Sources::Strategies [url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first end memoize :status_id + + def artist_name_from_url + [url, referer_url].map {|x| self.class.artist_name_from_url(x)}.compact.first + end end end diff --git a/test/unit/artist_url_test.rb b/test/unit/artist_url_test.rb index c56539376..f38e54dcd 100644 --- a/test/unit/artist_url_test.rb +++ b/test/unit/artist_url_test.rb @@ -152,6 +152,12 @@ class ArtistUrlTest < ActiveSupport::TestCase assert_equal("http://twitter.com/aoimanabu/", url.normalized_url) end + should "normalize https://twitter.com/intent/user?user_id=* urls" do + url = FactoryBot.create(:artist_url, :url => "https://twitter.com/intent/user?user_id=2784590030") + assert_equal("https://twitter.com/intent/user?user_id=2784590030", url.url) + assert_equal("http://twitter.com/intent/user?user_id=2784590030/", url.normalized_url) + end + should "normalize nijie urls" do url = FactoryBot.create(:artist_url, url: "https://pic03.nijie.info/nijie_picture/236014_20170620101426_0.png") assert_equal("http://nijie.info/members.php?id=161703/", url.normalized_url) diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index 9ce76fec8..31fc2a37c 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -91,7 +91,6 @@ module Sources setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key @site = Sources::Strategies.find("https://mobile.twitter.com/Strangestone/status/556440271961858051") - end should "get the image url" do @@ -177,6 +176,17 @@ module Sources end end + context "A deleted tweet" do + should "still find the artist name" do + @site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656") + @artist = FactoryBot.create(:artist, name: "masayasuf", url_string: @site.url) + + assert_equal("masayasuf", @site.artist_name) + assert_equal("https://twitter.com/masayasuf", @site.profile_url) + assert_equal([@artist], @site.artists) + end + end + context "A tweet" do setup do skip "Twitter key is not set" unless Danbooru.config.twitter_api_key