From ff5586cb01d0b79b3abba2f559e26b252ed699a4 Mon Sep 17 00:00:00 2001 From: r888888888 Date: Tue, 9 May 2017 12:46:40 -0700 Subject: [PATCH] refactor twitter service to handle cards (fixes #3031) --- Gemfile.lock | 29 ++++++++------- app/logical/twitter_service.rb | 62 ++++++++++++++++++++++++------- test/unit/sources/twitter_test.rb | 33 ++++++++++++++++ 3 files changed, 97 insertions(+), 27 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 083e1efff..d52bcb5a8 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -64,7 +64,7 @@ GEM minitest (~> 5.1) thread_safe (~> 0.3, >= 0.3.4) tzinfo (~> 1.1) - addressable (2.5.0) + addressable (2.5.1) public_suffix (~> 2.0, >= 2.0.2) arel (6.0.4) awesome_print (1.7.0) @@ -127,7 +127,7 @@ GEM factory_girl (4.8.0) activesupport (>= 3.0.0) fakeweb (1.3.0) - faraday (0.9.2) + faraday (0.10.1) multipart-post (>= 1.2, < 3) ffaker (2.5.0) ffi (1.9.10-x64-mingw32) @@ -155,10 +155,14 @@ GEM signet (~> 0.7) highline (1.7.8) hike (1.2.3) - http (0.6.4) + http (2.2.2) + addressable (~> 2.3) + http-cookie (~> 1.0) + http-form_data (~> 1.0.1) http_parser.rb (~> 0.6.0) http-cookie (1.0.2) domain_name (~> 0.5) + http-form_data (1.0.2) http_parser.rb (0.6.0) httpclient (2.8.0) hurley (0.2) @@ -200,7 +204,7 @@ GEM multi_json (1.12.1) multi_xml (0.6.0) multipart-post (2.0.0) - naught (1.0.0) + naught (1.1.0) net-http-digest_auth (1.4.1) net-http-persistent (2.9.4) net-scp (1.2.1) @@ -341,17 +345,16 @@ GEM tilt (1.4.1) timecop (0.8.1) tins (1.6.0) - twitter (5.14.0) - addressable (~> 2.3) + twitter (6.0.0) + addressable (~> 2.5) buftok (~> 0.2.0) - equalizer (~> 0.0.9) - faraday (~> 0.9.0) - http (~> 0.6.0) + equalizer (= 0.0.11) + faraday (~> 0.10.0) + http (~> 2.1) http_parser.rb (~> 0.6.0) - json (~> 1.8) - memoizable (~> 0.4.0) - naught (~> 1.0) - simple_oauth (~> 0.3.0) + memoizable (~> 0.4.2) + naught (~> 1.1) + simple_oauth (~> 0.3.1) tzinfo (1.2.2) thread_safe (~> 0.1) tzinfo-data (1.2015.6) diff --git a/app/logical/twitter_service.rb b/app/logical/twitter_service.rb index fa73cf93a..5faf3cef7 100644 --- a/app/logical/twitter_service.rb +++ b/app/logical/twitter_service.rb @@ -17,25 +17,59 @@ class TwitterService end end + def extract_urls_for_status(tweet) + tweet.media.map do |obj| + if obj.is_a?(Twitter::Media::Photo) + obj.media_url.to_s + ":orig" + elsif obj.is_a?(Twitter::Media::Video) + video = obj.video_info.variants.select do |x| + x.content_type == "video/mp4" + end.max_by {|y| y.bitrate} + if video + video.url.to_s + end + end + end.compact + end + + def extract_og_image_from_page(url, n = 5) + raise "too many redirects" if n == 0 + + Net::HTTP.start(url.host, url.port, :use_ssl => (url.normalized_scheme == "https")) do |http| + resp = http.request_get(url.request_uri) + if resp.is_a?(Net::HTTPMovedPermanently) && resp["Location"] + redirect_url = Addressable::URI.parse(resp["Location"]) + redirect_url.host = url.host if redirect_url.host.nil? + redirect_url.scheme = url.scheme if redirect_url.scheme.nil? + redirect_url.port = url.port if redirect_url.port.nil? + return extract_og_image_from_page(redirect_url, n - 1) + elsif resp.is_a?(Net::HTTPSuccess) + doc = Nokogiri::HTML(resp.body) + images = doc.css("meta[property='og:image']") + return images.first.attr("content") + end + end + end + + def extract_urls_for_card(attrs) + url = attrs.urls.map {|x| x.expanded_url}.reject {|x| x.host == "twitter.com"}.first + [extract_og_image_from_page(url)].compact + end + def image_urls(tweet_url) tweet_url =~ %r{/status/(\d+)} twitter_id = $1 - attrs = client.status(twitter_id).attrs + tweet = client.status(twitter_id) urls = [] - attrs[:entities][:media].each do |obj| - urls << obj[:media_url] + ":orig" - end - attrs[:extended_entities][:media].each do |obj| - if obj[:video_info] - largest = obj[:video_info][:variants].select {|x| x[:url] =~ /\.mp4$/}.max_by {|x| x[:bitrate]} - urls.clear - urls << largest[:url] if largest - else - urls << obj[:media_url] + ":orig" - end + + if tweet.media.any? + urls = extract_urls_for_status(tweet) + elsif tweet.urls.any? + urls = extract_urls_for_card(tweet) end + urls.uniq - rescue => e - [] + # rescue => e + # [] end end diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index 52ead7a56..8659192e3 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -2,6 +2,39 @@ require 'test_helper' module Sources class TwitterTest < ActiveSupport::TestCase + context "A video" do + setup do + @site = Sources::Site.new("https://twitter.com/CincinnatiZoo/status/859073537713328129") + @site.get + end + + should "get the image url" do + assert_equal("https://video.twimg.com/ext_tw_video/859073467769126913/pu/vid/1280x720/cPGgVROXHy3yrK6u.mp4", @site.image_url) + end + end + + context "An animated gif" do + setup do + @site = Sources::Site.new("https://twitter.com/DaniStrawberry1/status/859435334765088769") + @site.get + end + + should "get the image url" do + assert_equal("https://video.twimg.com/tweet_video/C-1Tns7WsAAqvqn.mp4", @site.image_url) + end + end + + context "A twitter summary card" do + setup do + @site = Sources::Site.new("https://twitter.com/NatGeo/status/787654447937847296") + @site.get + end + + should "get the image url" do + assert_equal("http://yourshot.nationalgeographic.com/u/fQYSUbVfts-T7odkrFJckdiFeHvab0GWOfzhj7tYdC0uglagsDcUxj3Tf7HBF3kZEj7S5m-zeDmZP6DBxBJlyJX_1mFp-hGf4JPt97xp0QJkwf4po1MmnZH73WC3a2Pa1Ky62C-v0cYXTur3-QwD3Pz5UI_cKIi81GABTXII8VwKUopxlNW2MYAR8kPYU2IoUhOjlvVefNcLYI74J-0IpI4tHDXE/", @site.image_url) + end + end + context "The source site for a restricted twitter" do setup do @site = Sources::Site.new("https://mobile.twitter.com/Strangestone/status/556440271961858051")