twitter: replace twitter gem with our own API client.

The twitter gem had several problems:

* It's been unmaintained for over a year.
* It pulled in a lot of dependencies, many of which were outdated. In
  particular, it locked the `http` gem to version 3.3, preventing us
  from upgrading to 4.2.
* It raised exceptions on normal error conditions, like for deleted
  tweets or suspended users, which we really don't want.
* We had to wrap it to provide caching.

Changes:

* Fixes #4226 (Exception when creating new artists entries for suspended
  Twitter accounts)
* Drop support for scraping images from summary cards. Summary cards
  are the previews you get when you link to a website in a tweet. These
  preview images aren't always the best image.
This commit is contained in:
evazion
2019-12-13 17:27:03 -06:00
parent 0b556ece1c
commit da84e3a2f2
6 changed files with 73 additions and 145 deletions

View File

@@ -21,7 +21,6 @@ gem 'capistrano-rbenv'
gem 'streamio-ffmpeg'
gem 'rubyzip', :require => "zip"
gem 'stripe'
gem 'twitter'
gem 'aws-sdk-sqs', '~> 1'
gem 'responders'
gem 'dtext_rb', git: "https://github.com/evazion/dtext_rb.git", require: "dtext"
@@ -45,6 +44,7 @@ gem 'builder'
gem 'puma'
gem 'scenic'
gem 'ipaddress'
gem 'http'
# needed for looser jpeg header compat
gem 'ruby-imagespec', :require => "image_spec", :git => "https://github.com/r888888888/ruby-imagespec.git", :branch => "exif-fixes"

View File

@@ -94,7 +94,6 @@ GEM
bcrypt (3.1.13)
bootsnap (1.4.5)
msgpack (~> 1.0)
buftok (0.2.0)
builder (3.2.3)
byebug (11.0.1)
capistrano (3.11.2)
@@ -144,7 +143,6 @@ GEM
dotenv-rails (2.7.5)
dotenv (= 2.7.5)
railties (>= 3.2, < 6.1)
equalizer (0.0.11)
erubi (1.9.0)
factory_bot (5.1.1)
activesupport (>= 4.2.0)
@@ -153,21 +151,25 @@ GEM
ffaker (2.13.0)
ffi (1.11.3)
ffi (1.11.3-x64-mingw32)
ffi-compiler (1.0.1)
ffi (>= 1.0.0)
rake
flamegraph (0.9.5)
get_process_mem (0.2.5)
ffi (~> 1.0)
globalid (0.4.2)
activesupport (>= 4.2.0)
hashdiff (1.0.0)
http (3.3.0)
http (4.2.0)
addressable (~> 2.3)
http-cookie (~> 1.0)
http-form_data (~> 2.0)
http_parser.rb (~> 0.6.0)
http-parser (~> 1.2.0)
http-cookie (1.0.3)
domain_name (~> 0.5)
http-form_data (2.1.1)
http_parser.rb (0.6.0)
http-parser (1.2.1)
ffi-compiler (>= 1.0, < 2.0)
httparty (0.17.1)
mime-types (~> 3.0)
multi_xml (>= 0.5.2)
@@ -202,8 +204,6 @@ GEM
ntlm-http (~> 0.1, >= 0.1.1)
webrobots (>= 0.0.9, < 0.2)
memoist (0.16.2)
memoizable (0.4.2)
thread_safe (~> 0.3, >= 0.3.1)
memory_profiler (0.9.14)
meta_request (0.7.2)
rack-contrib (>= 1.1, < 3)
@@ -226,7 +226,6 @@ GEM
multi_xml (0.6.0)
multipart-post (2.1.1)
mustermann (1.0.3)
naught (1.1.0)
net-http-digest_auth (1.4.1)
net-http-persistent (3.1.0)
connection_pool (~> 2.2)
@@ -339,7 +338,6 @@ GEM
simple_form (5.0.1)
actionpack (>= 5.0)
activemodel (>= 5.0)
simple_oauth (0.3.1)
simplecov (0.17.1)
docile (~> 1.1)
json (>= 1.8, < 3)
@@ -368,17 +366,6 @@ GEM
thor (0.20.3)
thread_safe (0.3.6)
tilt (2.0.10)
twitter (6.2.0)
addressable (~> 2.3)
buftok (~> 0.2.0)
equalizer (~> 0.0.11)
http (~> 3.0)
http-form_data (~> 2.0)
http_parser.rb (~> 0.6.0)
memoizable (~> 0.4.0)
multipart-post (~> 2.0)
naught (~> 1.0)
simple_oauth (~> 0.3.0)
tzinfo (1.2.5)
thread_safe (~> 0.1)
unf (0.1.4)
@@ -436,6 +423,7 @@ DEPENDENCIES
factory_bot
ffaker
flamegraph
http
httparty
ipaddress
jquery-rails
@@ -479,7 +467,6 @@ DEPENDENCIES
statistics2
streamio-ffmpeg
stripe
twitter
unicorn
unicorn-worker-killer
webmock

View File

@@ -16,7 +16,7 @@ module Sources::Strategies
RESERVED_USERNAMES = %w[home i intent search]
def self.enabled?
TwitterService.new.enabled?
Danbooru.config.twitter_api_key.present? && Danbooru.config.twitter_api_secret.present?
end
# https://twitter.com/i/web/status/943446161586733056
@@ -49,12 +49,20 @@ module Sources::Strategies
if url =~ IMAGE_URL
["https://pbs.twimg.com/media/#{$~[:file_name]}.#{$~[:file_ext]}:orig"]
elsif api_response.present?
service.image_urls(api_response)
api_response.dig(:extended_entities, :media).to_a.map do |media|
if media[:type] == "photo"
media[:media_url_https] + ":orig"
elsif media[:type].in?(["video", "animated_gif"])
variants = media.dig(:video_info, :variants)
videos = variants.select { |variant| variant[:content_type] == "video/mp4" }
video = videos.max_by { |video| video[:bitrate].to_i }
video[:url]
end
end
else
[url]
end
end
memoize :image_urls
def preview_urls
image_urls.map do |x|
@@ -73,9 +81,8 @@ module Sources::Strategies
end
def intent_url
return nil if api_response.blank?
user_id = api_response.attrs[:user][:id_str]
user_id = api_response.dig(:user, :id_str)
return nil if user_id.blank?
"https://twitter.com/intent/user?user_id=#{user_id}"
end
@@ -87,7 +94,7 @@ module Sources::Strategies
if artist_name_from_url.present?
artist_name_from_url
elsif api_response.present?
api_response.attrs[:user][:screen_name]
api_response.dig(:user, :screen_name)
else
""
end
@@ -98,8 +105,7 @@ module Sources::Strategies
end
def artist_commentary_desc
return "" if api_response.blank?
api_response.attrs[:full_text]
api_response[:full_text].to_s
end
def normalizable_for_artist_finder?
@@ -111,22 +117,19 @@ module Sources::Strategies
end
def tags
return [] if api_response.blank?
api_response.attrs[:entities][:hashtags].map do |text:, indices:|
[text, "https://twitter.com/hashtag/#{text}"]
api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
[hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
end
end
memoize :tags
def dtext_artist_commentary_desc
return "" if artist_commentary_desc.blank?
url_replacements = api_response.urls.map do |obj|
[obj.url.to_s, obj.expanded_url.to_s]
url_replacements = api_response.dig(:entities, :urls).to_a.map do |obj|
[obj[:url], obj[:expanded_url]]
end
url_replacements += api_response.media.map do |obj|
[obj.url.to_s, ""]
url_replacements += api_response.dig(:extended_entities, :media).to_a.map do |obj|
[obj[:url], ""]
end
url_replacements = url_replacements.to_h
@@ -137,30 +140,26 @@ module Sources::Strategies
desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]')
desc.strip
end
memoize :dtext_artist_commentary_desc
public
def service
TwitterService.new
def api_client
TwitterApiClient.new(Danbooru.config.twitter_api_key, Danbooru.config.twitter_api_secret)
end
memoize :service
def api_response
return {} if !service.enabled?
service.status(status_id, tweet_mode: "extended")
rescue ::Twitter::Error::NotFound
{}
return {} if !self.class.enabled?
api_client.status(status_id)
end
memoize :api_response
def status_id
[url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
end
memoize :status_id
def artist_name_from_url
[url, referer_url].map {|x| self.class.artist_name_from_url(x)}.compact.first
end
memoize :api_response
end
end

View File

@@ -0,0 +1,26 @@
class TwitterApiClient
extend Memoist
attr_reader :api_key, :api_secret
def initialize(api_key, api_secret)
@api_key, @api_secret = api_key, api_secret
end
def bearer_token(token_expiry = 24.hours)
http = Danbooru::Http.basic_auth(user: api_key, pass: api_secret)
response = http.cache(token_expiry).post("https://api.twitter.com/oauth2/token", form: { grant_type: :client_credentials })
response.parse["access_token"]
end
def client
Danbooru::Http.auth("Bearer #{bearer_token}")
end
def status(id, cache: 1.minute)
response = client.cache(cache).get("https://api.twitter.com/1.1/statuses/show.json?id=#{id}&tweet_mode=extended")
response.parse.with_indifferent_access
end
memoize :bearer_token, :client
end

View File

@@ -1,74 +0,0 @@
class TwitterService
class Error < Exception ; end
extend Memoist
def enabled?
Danbooru.config.twitter_api_key.present? && Danbooru.config.twitter_api_secret.present?
end
def client
raise Error, "Twitter API keys not set" if !enabled?
rest_client = ::Twitter::REST::Client.new do |config|
config.consumer_key = Danbooru.config.twitter_api_key
config.consumer_secret = Danbooru.config.twitter_api_secret
if bearer_token = Cache.get("twitter-api-token")
config.bearer_token = bearer_token
end
end
Cache.put("twitter-api-token", rest_client.bearer_token)
rest_client
end
memoize :client
def status(id, options = {})
Cache.get("twitterapi:#{id}", 60) do
client.status(id, options)
end
end
def extract_urls_for_status(tweet)
tweet.media.map do |obj|
if obj.is_a?(Twitter::Media::Photo)
obj.media_url_https.to_s + ":orig"
elsif obj.is_a?(Twitter::Media::Video)
video = obj.video_info.variants.select do |x|
x.content_type == "video/mp4"
end.max_by {|y| y.bitrate}
if video
video.url.to_s
end
end
end.compact.uniq
end
def extract_og_image_from_page(url)
resp = HTTParty.get(url, Danbooru.config.httparty_options)
if resp.success?
doc = Nokogiri::HTML(resp.body)
images = doc.css("meta[property='og:image']")
return images.first.attr("content").sub(":large", ":orig")
end
end
def extract_urls_for_card(attrs)
urls = attrs.urls.map {|x| x.expanded_url}
url = urls.reject {|x| x.host == "twitter.com"}.first
if url.nil?
url = urls.first
end
[extract_og_image_from_page(url)].compact
end
def image_urls(tweet)
if tweet.media.any?
extract_urls_for_status(tweet)
elsif tweet.urls.any?
extract_urls_for_card(tweet)
else
[]
end
end
end

View File

@@ -46,27 +46,6 @@ module Sources
end
end
context "A twitter summary card" do
setup do
@site = Sources::Strategies.find("https://twitter.com/NatGeo/status/932700115936178177")
end
should "get the image url" do
assert_equal("https://pmdvod.nationalgeographic.com/NG_Video/205/302/smpost_1510342850295.jpg", @site.image_url)
end
end
context "A twitter summary card from twitter" do
setup do
@site = Sources::Strategies.find("https://twitter.com/masayasuf/status/870734961778630656/photo/1")
end
should "get the image url" do
skip "Find another url, the masayasuf tweet no longer exists"
assert_equal("https://pbs.twimg.com/media/DBV40M2UIAAHYlt.jpg:orig", @site.image_url)
end
end
context "A twitter summary card from twitter with a :large image" do
setup do
@site = Sources::Strategies.find("https://twitter.com/aranobu/status/817736083567820800")
@@ -269,5 +248,16 @@ module Sources
assert_equal("https://pbs.twimg.com/media/C8p-gPhVoAMZupS.png:orig", site.image_url)
end
end
context "A tweet from a suspended user" do
should "not fail" do
site = Sources::Strategies.find("https://twitter.com/tanso_panz/status/1192429800717029377")
assert_equal(site.site_name, "Twitter")
assert_equal("tanso_panz", site.artist_name)
assert_equal("https://twitter.com/tanso_panz", site.profile_url)
assert_nil(site.image_url)
end
end
end
end