From abf493794fc042f62789511c5559d7dd1eb004a3 Mon Sep 17 00:00:00 2001 From: evazion Date: Thu, 15 Sep 2022 19:38:03 -0500 Subject: [PATCH] twitter: fix misparsing of https://twitter.com/i/status/:id urls. Fix URLs like `https://twitter.com/i/status/943446161586733056` parsing the username as `i`. This led to the new artist page recommending the tag name `i` when creating an artist for a source like this. Also fix these URLs not being normalized to `https://twitter.com/:username/status/:id` after upload. --- app/logical/source/url/twitter.rb | 3 +- test/unit/sources/twitter_test.rb | 59 +++++++++++++++++++++++++------ 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/app/logical/source/url/twitter.rb b/app/logical/source/url/twitter.rb index 5e7d1095c..11befc8cc 100644 --- a/app/logical/source/url/twitter.rb +++ b/app/logical/source/url/twitter.rb @@ -40,12 +40,13 @@ class Source::URL::Twitter < Source::URL in "twitter.com", "i", "web", "status", status_id @status_id = status_id + # https://twitter.com/i/status/943446161586733056 # https://twitter.com/motty08111213/status/943446161586733056 # https://twitter.com/motty08111213/status/943446161586733056?s=19 # https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1 # https://twitter.com/sato_1_11/status/1496489742791475201/photo/2 in "twitter.com", username, "status", status_id, *rest - @username = username + @username = username unless username.in?(RESERVED_USERNAMES) @status_id = status_id # https://twitter.com/intent/user?user_id=1485229827984531457 diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index bfdbe857d..55761d98c 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -181,18 +181,52 @@ module Sources end end - context "The source site for a https://twitter.com/i/web/status/:id url" do - setup do - @site = Source::Extractor.find("https://twitter.com/i/web/status/943446161586733056") - end + context "A https://twitter.com/:username/status/:id url" do + strategy_should_work( + "https://twitter.com/motty08111213/status/943446161586733056", + page_url: "https://twitter.com/motty08111213/status/943446161586733056", + image_urls: [ + "https://pbs.twimg.com/media/DRfKHmgV4AAycFB.jpg:orig", + "https://pbs.twimg.com/media/DRfKHioVoAALRlK.jpg:orig", + "https://pbs.twimg.com/media/DRfKHgHU8AE7alV.jpg:orig" + ], + profile_url: "https://twitter.com/motty08111213", + artist_name: "えのぐマネージャー 丸茂", + tag_name: "motty08111213", + tags: ["岩本町芸能社", "女優部"], + ) + end - should "fetch the source data" do - assert_equal("https://twitter.com/motty08111213", @site.profile_url) - end + context "A https://twitter.com/i/web/status/:id url" do + strategy_should_work( + "https://twitter.com/i/web/status/943446161586733056", + page_url: "https://twitter.com/motty08111213/status/943446161586733056", + image_urls: [ + "https://pbs.twimg.com/media/DRfKHmgV4AAycFB.jpg:orig", + "https://pbs.twimg.com/media/DRfKHioVoAALRlK.jpg:orig", + "https://pbs.twimg.com/media/DRfKHgHU8AE7alV.jpg:orig" + ], + profile_url: "https://twitter.com/motty08111213", + artist_name: "えのぐマネージャー 丸茂", + tag_name: "motty08111213", + tags: ["岩本町芸能社", "女優部"], + ) + end - should "get the page url" do - assert_equal("https://twitter.com/motty08111213/status/943446161586733056", @site.page_url) - end + context "A https://twitter.com/i/status/:id url" do + strategy_should_work( + "https://twitter.com/i/status/943446161586733056", + page_url: "https://twitter.com/motty08111213/status/943446161586733056", + image_urls: [ + "https://pbs.twimg.com/media/DRfKHmgV4AAycFB.jpg:orig", + "https://pbs.twimg.com/media/DRfKHioVoAALRlK.jpg:orig", + "https://pbs.twimg.com/media/DRfKHgHU8AE7alV.jpg:orig" + ], + profile_url: "https://twitter.com/motty08111213", + artist_name: "えのぐマネージャー 丸茂", + tag_name: "motty08111213", + tags: ["岩本町芸能社", "女優部"], + ) end context "A deleted tweet" do @@ -297,6 +331,7 @@ module Sources assert(Source::URL.image_url?("https://pbs.twimg.com/ext_tw_video_thumb/1243725361986375680/pu/img/JDA7g7lcw7wK-PIv.jpg")) assert(Source::URL.image_url?("https://pbs.twimg.com/amplify_video_thumb/1215590775364259840/img/lolCkEEioFZTb5dl.jpg")) + assert(Source::URL.page_url?("https://twitter.com/i/status/1261877313349640194")) assert(Source::URL.page_url?("https://twitter.com/i/web/status/1261877313349640194")) assert(Source::URL.page_url?("https://twitter.com/BOW999/status/1261877313349640194")) assert(Source::URL.page_url?("https://twitter.com/BOW999/status/1261877313349640194/photo/1")) @@ -309,6 +344,10 @@ module Sources assert(Source::URL.profile_url?("https://twitter.com/i/user/889592953")) refute(Source::URL.profile_url?("https://twitter.com/home")) + + assert_nil(Source::URL.parse("https://twitter.com/i/status/1261877313349640194").username) + assert_nil(Source::URL.parse("https://twitter.com/i/web/status/1261877313349640194").username) + assert_equal("BOW999", Source::URL.parse("https://twitter.com/BOW999/status/1261877313349640194").username) end end end