Refactor sources

This commit is contained in:
Albert Yi
2018-08-06 17:39:25 -07:00
parent 54363ffecf
commit 762dc3da24
71 changed files with 2340 additions and 2430 deletions

View File

@@ -1,78 +0,0 @@
# encoding: UTF-8
module Sources
class Site
class NoStrategyError < RuntimeError ; end
attr_reader :strategy
delegate :url, :get, :get_size, :site_name, :artist_name,
:profile_url, :image_url, :tags, :artists, :unique_id,
:file_url, :ugoira_frame_data, :ugoira_content_type, :image_urls,
:artist_commentary_title, :artist_commentary_desc,
:dtext_artist_commentary_title, :dtext_artist_commentary_desc,
:rewrite_thumbnails, :illust_id_from_url, :translate_tag, :translated_tags, :to => :strategy
def self.strategies
[Strategies::Pixiv, Strategies::NicoSeiga, Strategies::DeviantArt, Strategies::ArtStation, Strategies::Nijie, Strategies::Twitter, Strategies::Tumblr, Strategies::Pawoo]
end
def initialize(url, referer_url: nil)
@url = url
Site.strategies.each do |strategy|
if strategy.url_match?(url) || strategy.url_match?(referer_url)
@strategy = strategy.new(url, referer_url)
return
end
end
raise NoStrategyError.new
end
def referer_url
strategy.try(:referer_url)
end
def normalized_for_artist_finder?
available? && strategy.normalized_for_artist_finder?
end
def normalize_for_artist_finder!
if available? && strategy.normalizable_for_artist_finder?
strategy.normalize_for_artist_finder!
else
url
end
rescue
url
end
def to_h
return {
:artist_name => artist_name,
:artists => artists.as_json(include: :sorted_urls),
:profile_url => profile_url,
:image_url => image_url,
:image_urls => image_urls,
:normalized_for_artist_finder_url => normalize_for_artist_finder!,
:tags => tags,
:translated_tags => translated_tags,
:unique_id => unique_id,
:artist_commentary => {
:title => artist_commentary_title,
:description => artist_commentary_desc,
:dtext_title => dtext_artist_commentary_title,
:dtext_description => dtext_artist_commentary_desc,
}
}
end
def to_json
to_h.to_json
end
def available?
strategy.present?
end
end
end

View File

@@ -0,0 +1,29 @@
module Sources
module Strategies
def self.all
return [
Strategies::Pixiv,
Strategies::NicoSeiga,
Strategies::Twitter,
Strategies::DeviantArt,
Strategies::Tumblr,
Strategies::ArtStation,
Strategies::Nijie,
Strategies::Pawoo,
Strategies::Moebooru,
Strategies::Null # MUST BE LAST!
]
end
def self.find(url, referer=nil)
all
.detect { |strategy| strategy.match?(url, referer) }
.new(url, referer)
end
def self.canonical(url, referer)
find(url, referer).canonical_url
end
end
end

View File

@@ -1,68 +1,165 @@
module Sources::Strategies
class ArtStation < Base
PROJECT = %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)/?\z!i
ASSET = %r!\Ahttps?://cdn\w*\.artstation\.com/p/assets/images/images/\d+/\d+/\d+/(?:medium|small|large)/!i
PROFILE1 = %r!\Ahttps?://(\w+)\.artstation\.com!i
PROFILE2 = %r!\Ahttps?://www.artstation.com/artist/(\w+)!i
PROFILE3 = %r!\Ahttps?://www.artstation.com/(\w+)!i
PROFILE = %r!#{PROFILE2}|#{PROFILE3}|#{PROFILE1}!
attr_reader :json, :image_urls
def self.url_match?(url)
self.project_id(url).present?
def self.match?(*urls)
urls.compact.any? { |x| x.match?(PROJECT) || x.match?(ASSET) || x.match?(PROFILE)}
end
# https://www.artstation.com/artwork/04XA4
# https://www.artstation.com/artwork/cody-from-sf
# https://sa-dui.artstation.com/projects/DVERn
def self.project_id(url)
if url =~ %r!\Ahttps?://[a-z0-9-]+\.artstation\.com/(?:artwork|projects)/(?<project_id>[a-z0-9-]+)\z!i
if url =~ PROJECT
$~[:project_id]
else
nil
end
end
def referer_url
if self.class.url_match?(@referer_url)
@referer_url
else
@url
end
end
def site_name
"ArtStation"
end
def project_id
self.class.project_id(referer_url)
def image_urls
image_urls_sub
.map { |asset| original_asset_url(asset) }
end
memoize :image_urls
def page_url
"https://www.artstation.com/artwork/#{project_id}"
[url, referer_url].each do |x|
if x =~ PROJECT
return "https://www.artstation.com/artwork/#{$~[:project_id]}"
end
end
return super
end
def profile_url
if url =~ PROFILE1 && $1 != "www"
return "https://www.artstation.com/#{$1}"
end
if url =~ PROFILE2
return "https://www.artstation.com/#{$1}"
end
if url =~ PROFILE3 && url !~ PROJECT
return url
end
api_json["user"]["permalink"]
end
def artist_name
api_json["user"]["username"]
end
def artist_commentary_title
api_json["title"]
end
def artist_commentary_desc
ActionView::Base.full_sanitizer.sanitize(api_json["description"])
end
memoize :artist_commentary_desc
def tags
return nil if !api_json.has_key?("tags")
api_json["tags"].
map { |tag| [tag.downcase.tr(" ", "_"), tag_url(tag)]}
end
memoize :tags
def normalized_for_artist_finder?
url =~ PROFILE3 && url !~ PROFILE2 && url !~ PROJECT
end
def normalizable_for_artist_finder?
url =~ PROFILE || url =~ PROJECT
end
def normalize_for_artist_finder
profile_url
end
public
def image_urls_sub
if url.match?(ASSET)
return [url]
end
api_json["assets"]
.select { |asset| asset["asset_type"] == "image" }
.map { |asset| asset["image_url"] }
end
# these are de facto private methods but are public for testing
# purposes
def project_id
self.class.project_id(url) || self.class.project_id(referer_url)
end
memoize :project_id
def api_url
"https://www.artstation.com/projects/#{project_id}.json"
end
def image_url
image_urls.first
def api_json
if project_id.nil?
raise ::Sources::Error.new("Project id could not be determined from (#{url}, #{referer_url})")
end
resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
if resp.success?
json = JSON.parse(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end
return json
end
memoize :api_json
# Returns the original representation of the asset, if it exists. Otherwise
# return the url.
def original_asset_url(x)
if x =~ ASSET
# example: https://cdnb3.artstation.com/p/assets/images/images/003/716/071/large/aoi-ogata-hate-city.jpg?1476754974
original_url = x.sub(%r!/(?:medium|small|large)/!, "/original/")
if http_exists?(original_url, headers)
return original_url
end
if x =~ /medium|small/
large_url = x.sub(%r!/(?:medium|small)/!, "/large/")
if http_exists?(large_url, headers)
return large_url
end
end
end
return x
end
def get
resp = HTTParty.get(api_url, Danbooru.config.httparty_options)
image_url_rewriter = Downloads::RewriteStrategies::ArtStation.new
if resp.success?
@json = JSON.parse(resp.body)
@artist_name = json["user"]["username"]
@profile_url = json["user"]["permalink"]
images = json["assets"].select { |asset| asset["asset_type"] == "image" }
@image_urls = images.map do |x|
y, _, _ = image_url_rewriter.rewrite(x["image_url"], nil)
y
end
@tags = json["tags"].map {|x| [x.downcase.tr(" ", "_"), "https://www.artstation.com/search?q=" + CGI.escape(x)]} if json["tags"]
@artist_commentary_title = json["title"]
@artist_commentary_desc = ActionView::Base.full_sanitizer.sanitize(json["description"])
else
raise "HTTP error code: #{resp.code} #{resp.message}"
end
def tag_url(name)
"https://www.artstation.com/search?q=" + CGI.escape(name)
end
end
end

View File

@@ -1,41 +1,108 @@
# This is a collection of strategies for extracting information about a
# resource. At a minimum it tries to extract the artist name and a canonical
# URL to download the image from. But it can also be used to normalize a URL
# for use with the artist finder. It differs from Downloads::RewriteStrategies
# in that the latter is more for normalizing and rewriting a URL until it is
# suitable for downloading, whereas Sources::Strategies is more for meta-data
# that can only be obtained by downloading and parsing the resource.
# for use with the artist finder.
#
# Design Principles
#
# In general you should minimize state. You can safely assume that <tt>url</tt>
# and <tt>referer_url</tt> will not change over the lifetime of an instance,
# so you can safely memoize methods and their results. A common pattern is
# conditionally making an external API call and parsing its response. You should
# make this call on demand and memoize the response.
module Sources
module Strategies
class Base
attr_reader :url, :referer_url
attr_reader :artist_name, :profile_url, :image_url, :tags
attr_reader :artist_commentary_title, :artist_commentary_desc
def self.url_match?(url)
extend Memoist
def self.match?(*urls)
false
end
# * <tt>url</tt> - Should point to a resource suitable for
# downloading. This may sometimes point to the binary file.
# It may also point to the artist's profile page, in cases
# where this class is being used to normalize artist urls.
# Implementations should be smart enough to detect this and
# behave accordingly.
# * <tt>referer_url</tt> - Sometimes the HTML page cannot be
# determined from <tt>url</tt>. You should generally pass in a
# <tt>referrer_url</tt> so the strategy can discover the HTML
# page and other information.
def initialize(url, referer_url = nil)
@url = url
@referer_url = referer_url
end
# No remote calls are made until this method is called.
def get
def site_name
raise NotImplementedError
end
def get_size
@get_size ||= Downloads::File.new(@image_url).size
# Whatever <tt>url</tt> is, this method should return the direct links
# to the canonical binary files. It should not be an HTML page. It should
# be a list of JPEG, PNG, GIF, WEBM, MP4, ZIP, etc. It is what the
# downloader will fetch and save to disk.
def image_urls
raise NotImplementedError
end
def image_url
image_urls.first
end
# Whatever <tt>url</tt> is, this method should return a link to the HTML
# page containing the resource. It should not be a binary file. It will
# eventually be assigned as the source for the post, but it does not
# represent what the downloader will fetch.
def page_url
Rails.logger.warn "Valid page url for (#{url}, #{referer_url}) not found"
return nil
end
# This will be the url stored in posts. Typically this is the page
# url, but on some sites it may be preferable to store the image url.
def canonical_url
page_url
end
# A link to the artist's profile page on the site.
def profile_url
nil
end
def artist_name
raise NotImplementedError
end
def artist_commentary_title
nil
end
def artist_commentary_desc
nil
end
# Subclasses should merge in any required headers needed to access resources
# on the site.
def headers
return Danbooru.config.http_headers
end
# Returns the size of the image resource without actually downloading the file.
def size
Downloads::File.new(image_url).size
end
memoize :size
# Subclasses should return true only if the URL is in its final normalized form.
#
# Sources::Site.new("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
# Sources::Strategies.find("http://img.pixiv.net/img/evazion").normalized_for_artist_finder?
# => true
# Sources::Site.new("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
# Sources::Strategies.find("http://i2.pixiv.net/img18/img/evazion/14901720_m.png").normalized_for_artist_finder?
# => false
def normalized_for_artist_finder?
false
@@ -44,32 +111,33 @@ module Sources
# Subclasses should return true only if the URL is a valid URL that could
# be converted into normalized form.
#
# Sources::Site.new("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
# Sources::Strategies.find("http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054").normalizable_for_artist_finder?
# => true
# Sources::Site.new("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
# Sources::Strategies.find("http://dic.pixiv.net/a/THUNDERproject").normalizable_for_artist_finder?
# => false
def normalizable_for_artist_finder?
false
end
def normalize_for_artist_finder!
url
end
def site_name
raise NotImplementedError
def normalize_for_artist_finder
profile_url || url
end
# A unique identifier for the artist. This is used for artist creation.
def unique_id
artist_name
end
def artists
Artist.find_artists(url, referer_url)
Artist.find_artists(profile_url)
end
def image_urls
[image_url]
def file_url
image_url
end
def data
{}
end
def tags
@@ -97,11 +165,6 @@ module Sources
translated_tags
end
# Should be set to a url for sites that prevent hotlinking, or left nil for sites that don't.
def fake_referer
nil
end
def dtext_artist_commentary_title
self.class.to_dtext(artist_commentary_title)
end
@@ -110,9 +173,40 @@ module Sources
self.class.to_dtext(artist_commentary_desc)
end
# A strategy may return extra data unrelated to the file
def data
return {}
end
def to_h
return {
:artist_name => artist_name,
:artists => artists.as_json(include: :sorted_urls),
:profile_url => profile_url,
:image_url => image_url,
:image_urls => image_urls,
:normalized_for_artist_finder_url => normalize_for_artist_finder,
:tags => tags,
:translated_tags => translated_tags,
:unique_id => unique_id,
:artist_commentary => {
:title => artist_commentary_title,
:description => artist_commentary_desc,
:dtext_title => dtext_artist_commentary_title,
:dtext_description => dtext_artist_commentary_desc,
}
}
end
def to_json
to_h.to_json
end
protected
def agent
raise NotImplementedError
def http_exists?(url, headers)
res = HTTParty.head(url, Danbooru.config.httparty_options.deep_merge(headers: headers))
res.success?
end
# Convert commentary to dtext by stripping html tags. Sites can override

View File

@@ -1,44 +1,127 @@
module Sources
module Strategies
class DeviantArt < Base
extend Memoist
ATTRIBUTED_ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
ASSET = %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
PATH_ART = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
RESERVED_SUBDOMAINS = %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.}
SUBDOMAIN_ART = %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
PROFILE = %r{\Ahttps?://www\.deviantart\.com/([^/]+)/?\z}
def self.url_match?(url)
url =~ /^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/
end
def self.normalize(url)
if url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-z0-9_]*_by_[a-z0-9_]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net/.+/[a-f0-9]+-d([a-z0-9]+)\.}i
"http://fav.me/d#{$1}"
elsif url =~ %r{\Ahttps?://www\.deviantart\.com/([^/]+)/art/}
url
elsif url !~ %r{\Ahttps?://(?:fc|th|pre|orig|img|www)\.} && url =~ %r{\Ahttps?://(.+?)\.deviantart\.com(.*)}
"http://www.deviantart.com/#{$1}#{$2}"
else
url
end
end
def referer_url
if @referer_url =~ /deviantart\.com\/art\// && @url =~ /https?:\/\/(?:fc|th|pre|orig|img)\d{2}\.deviantart\.net\//
@referer_url
else
@url
end
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?deviantart\.(?:com|net)/) }
end
def site_name
"Deviant Art"
end
def unique_id
artist_name
def image_urls
# normalize thumbnails
if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)200H/}
match = $1
return [url.sub(match + "200H/", match)]
end
if url =~ %r{^(https?://(?:fc|th)\d{2}\.deviantart\.net/.+?/)PRE/}
match = $1
return [url.sub(match + "PRE/", match)]
end
# return direct links
if url =~ ATTRIBUTED_ASSET || url =~ ASSET
return [url]
end
# work is deleted, use image url as given by user.
if uuid.nil?
return [url]
end
# work is downloadable
if api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
src = api_download[:src]
src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.gsub!(/\?.*\z/, "") # strip s3 query params
src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
return [src]
end
# work isn't downloadable, or download size is same as regular size.
if api_deviation.present?
return [api_deviation.dig(:content, :src)]
end
raise "Couldn't find image url"
end
def get
# no-op
def page_url
[url, referer_url].each do |x|
if x =~ ATTRIBUTED_ASSET
return "http://fav.me/d#{$1}"
end
if x =~ ASSET
return "http://fav.me/d#{$1}"
end
if x =~ PATH_ART
return x
end
if x !~ RESERVED_SUBDOMAINS && x =~ SUBDOMAIN_ART
return "http://www.deviantart.com/#{$1}#{$2}"
end
end
return super
end
def profile_url
if url =~ PROFILE
return url
end
if artist_name.blank?
return nil
end
return "https://www.deviantart.com/#{artist_name}"
end
def artist_name
api_metadata.dig(:author, :username).try(&:downcase)
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
url =~ PATH_ART || url =~ SUBDOMAIN_ART
end
def normalize_for_artist_finder
profile_url
end
def tags
if api_metadata.blank?
return []
end
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end
def dtext_artist_commentary_desc
@@ -71,75 +154,24 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def artist_name
api_metadata.dig(:author, :username).try(&:downcase)
end
def profile_url
return "" if artist_name.blank?
"https://www.deviantart.com/#{artist_name}"
end
def image_url
# work is deleted, use image url as given by user.
if uuid.nil?
url
# work is downloadable
elsif api_deviation[:is_downloadable] && api_deviation[:download_filesize] != api_deviation.dig(:content, :filesize)
src = api_download[:src]
src.gsub!(%r!\Ahttps?://s3\.amazonaws\.com/!i, "https://")
src.gsub!(/\?.*\z/, "") # strip s3 query params
src.gsub!(%r!\Ahttps://origin-orig\.deviantart\.net!, "http://origin-orig.deviantart.net") # https://origin-orig.devianart.net doesn't work
src
# work isn't downloadable, or download size is same as regular size.
elsif api_deviation.present?
api_deviation.dig(:content, :src)
else
raise "couldn't find image url"
end
end
def tags
return [] if api_metadata.blank?
api_metadata[:tags].map do |tag|
[tag[:tag_name], "https://www.deviantart.com/tag/#{tag[:tag_name]}"]
end
end
def artist_commentary_title
api_metadata[:title]
end
def artist_commentary_desc
api_metadata[:description]
end
def normalizable_for_artist_finder?
url !~ %r!^https?://www.deviantart.com/!
end
def normalized_for_artist_finder?
url =~ %r!^https?://www.deviantart.com/!
end
def normalize_for_artist_finder!
profile_url
end
protected
def normalized_url
@normalized_url ||= self.class.normalize(url)
end
public
def page
options = Danbooru.config.httparty_options.deep_merge(format: :plain, headers: { "Accept-Encoding" => "gzip" })
resp = HTTParty.get(normalized_url, **options)
body = Zlib.gunzip(resp.body)
options = Danbooru.config.httparty_options.deep_merge(
format: :plain,
headers: { "Accept-Encoding" => "gzip" }
)
resp = HTTParty.get(page_url, **options)
if resp.success?
body = Zlib.gunzip(resp.body)
else
raise HTTParty::ResponseError.new(resp)
end
Nokogiri::HTML(body)
end
memoize :page
# Scrape UUID from <meta property="da:appurl" content="DeviantArt://deviation/12F08C5D-A3A4-338C-2F1A-7E4E268C0E8B">
# For private works the UUID will be nil.
@@ -151,29 +183,39 @@ module Sources
uuid = appurl[%r!\ADeviantArt://deviation/(.*)\z!, 1]
uuid
end
memoize :uuid
def api_client
api_client = DeviantArtApiClient.new(Danbooru.config.deviantart_client_id, Danbooru.config.deviantart_client_secret, Danbooru.config.httparty_options)
api_client.access_token = Cache.get("da-access-token", 55.minutes) { api_client.access_token.to_hash }
api_client = DeviantArtApiClient.new(
Danbooru.config.deviantart_client_id,
Danbooru.config.deviantart_client_secret,
Danbooru.config.httparty_options
)
api_client.access_token = Cache.get("da-access-token", 55.minutes) do
api_client.access_token.to_hash
end
api_client
end
memoize :api_client
def api_deviation
return {} if uuid.nil?
api_client.deviation(uuid)
end
memoize :api_deviation
def api_metadata
return {} if uuid.nil?
api_client.metadata(uuid)[:metadata].first
end
memoize :api_metadata
def api_download
return {} if uuid.nil?
api_client.download(uuid)
end
memoize :api_download
memoize :page, :uuid, :api_client, :api_deviation, :api_metadata, :api_download
end
end
end

View File

@@ -0,0 +1,35 @@
module Sources
module Strategies
class Moebooru < Base
DOMAINS = /(?:[^.]+\.)?yande\.re|konachan\.com/
def self.match?(*urls)
urls.compact.any? { |x| x.match?(DOMAINS) }
end
def site_name
URI.parse(url).host
end
def image_url
if url =~ %r{\A(https?://(?:#{DOMAINS}))/jpeg/([a-f0-9]+(?:/.*)?)\.jpg\Z}
return $1 + "/image/" + $2 + ".png"
end
return url
end
def page_url
return url
end
def profile_url
return url
end
def artist_name
return ""
end
end
end
end

View File

@@ -1,182 +1,188 @@
module Sources
module Strategies
class NicoSeiga < Base
extend Memoist
def self.url_match?(url)
url =~ /^https?:\/\/(?:\w+\.)?nico(?:seiga|video)\.jp/
end
URL = %r!\Ahttps?://(?:\w+\.)?nico(?:seiga|video)\.jp!
DIRECT = %r!\Ahttps?://lohas\.nicoseiga\.jp/priv/[0-9a-f]+!
PAGE = %r!\Ahttps?://seiga\.nicovideo\.jp/seiga/im(\d+)!i
PROFILE = %r!\Ahttps?://seiga\.nicovideo\.jp/user/illust/(\d+)!i
def referer_url
if @referer_url =~ /seiga\.nicovideo\.jp\/seiga\/im\d+/ && @url =~ /http:\/\/lohas\.nicoseiga\.jp\/(?:priv|o)\//
@referer_url
else
@url
end
def self.match?(*urls)
urls.compact.any? { |x| x.match?(URL) }
end
def site_name
"Nico Seiga"
end
def unique_id
profile_url =~ /\/illust\/(\d+)/
"nicoseiga" + $1
end
def get
page = load_page
@artist_name, @profile_url = get_profile_from_api
@image_url = get_image_url_from_page(page)
@artist_commentary_title, @artist_commentary_desc = get_artist_commentary_from_api
# Log out before getting the tags.
# The reason for this is that if you're logged in and viewing a non-adult-rated work, the tags will be added with javascript after the page has loaded meaning we can't extract them easily.
# This does not apply if you're logged out (or if you're viewing an adult-rated work).
agent.cookie_jar.clear!
agent.get(normalized_url) do |page|
@tags = get_tags_from_page(page)
end
end
def normalized_for_artist_finder?
url =~ %r!https?://seiga\.nicovideo\.jp/user/illust/\d+/!i
end
def normalizable_for_artist_finder?
url =~ %r!https?://seiga\.nicovideo\.jp/seiga/im\d+!i
end
def normalize_for_artist_finder!
page = load_page
@illust_id = get_illust_id_from_url
@artist_name, @profile_url = get_profile_from_api
@profile_url + "/"
end
protected
def api_client
NicoSeigaApiClient.new(get_illust_id_from_url)
end
def get_illust_id_from_url
if normalized_url =~ %r!http://seiga.nicovideo.jp/seiga/im(\d+)!
$1.to_i
else
nil
end
end
def load_page
page = agent.get(normalized_url)
if page.search("a#link_btn_login").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nico-seiga-session")
@agent = nil
page = agent.get(normalized_url)
def image_urls
if url =~ DIRECT
return [url]
end
page
end
def get_profile_from_api
return [api_client.moniker, "http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"]
end
def get_image_url_from_page(page)
link = page.search("a#illust_link")
if link.any?
image_url = "http://seiga.nicovideo.jp" + link[0]["href"]
page = agent.get(image_url) # need to follow this redirect while logged in or it won't work
if page.is_a?(Mechanize::Image)
return page.uri.to_s
return [page.uri.to_s]
end
images = page.search("div.illust_view_big").select {|x| x["data-src"] =~ /\/priv\//}
if images.any?
image_url = "http://lohas.nicoseiga.jp" + images[0]["data-src"]
end
else
image_url = nil
end
return image_url
end
def get_tags_from_page(page)
links = page.search("a.tag")
links.map do |node|
[node.text, "http://seiga.nicovideo.jp" + node.attr("href")]
end
end
def get_artist_commentary_from_api
[api_client.title, api_client.desc]
end
def normalized_url
@normalized_url ||= begin
if url =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
"http://seiga.nicovideo.jp/seiga/im#{$1}"
elsif url =~ %r{/seiga/im\d+}
url
else
nil
return ["http://lohas.nicoseiga.jp" + images[0]["data-src"]]
end
end
raise "image url not found for (#{url}, #{referer_url})"
end
def page_url
[url, referer_url].each do |x|
if x =~ %r!\Ahttps?://lohas\.nicoseiga\.jp/o/[a-f0-9]+/\d+/(\d+)!
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)\?e=\d+&h=[a-f0-9]+}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/[a-f0-9]+/\d+/(\d+)}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp/priv/(\d+)}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{\Ahttps?://lohas\.nicoseiga\.jp//?thumb/(\d+)i?}i
return "http://seiga.nicovideo.jp/seiga/im#{$1}"
end
if x =~ %r{/seiga/im\d+}
return x
end
end
return super
end
def profile_url
if url =~ PROFILE
return url
end
"http://seiga.nicovideo.jp/user/illust/#{api_client.user_id}"
end
def artist_name
api_client.moniker
end
def artist_commentary_title
api_client.title
end
def artist_commentary_desc
api_client.desc
end
def headers
super.merge(
"Referer" => "https://seiga.nicovideo.jp"
)
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
url =~ PAGE || url =~ PROFILE
end
def normalize_for_artist_finder
"#{profile_url}/"
end
def unique_id
"nicoseiga#{api_client.user_id}"
end
def tags
string = page.at("meta[name=keywords]").try(:[], "content") || ""
string.split(/,/).map do |name|
[name, "https://seiga.nicovideo.jp/tag/#{CGI.escape(name)}"]
end
end
memoize :tags
public
def api_client
NicoSeigaApiClient.new(illust_id)
end
memoize :api_client
def illust_id
if page_url =~ PAGE
return $1.to_i
end
return nil
end
def page
doc = agent.get(page_url)
if doc.search("a#link_btn_login").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nico-seiga-session")
doc = agent.get(page_url)
end
doc
end
memoize :page
def agent
@agent ||= begin
mech = Mechanize.new
mech.redirect_ok = false
mech.keep_alive = false
mech = Mechanize.new
mech.redirect_ok = false
mech.keep_alive = false
session = Cache.get("nico-seiga-session")
if session
cookie = Mechanize::Cookie.new("user_session", session)
cookie.domain = ".nicovideo.jp"
cookie.path = "/"
mech.cookie_jar.add(cookie)
else
mech.get("https://account.nicovideo.jp/login") do |page|
page.form_with(:id => "login_form") do |form|
form["mail_tel"] = Danbooru.config.nico_seiga_login
form["password"] = Danbooru.config.nico_seiga_password
end.click_button
end
session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first
if session
Cache.put("nico-seiga-session", session.value, 1.month)
else
raise "Session not found"
end
end
# This cookie needs to be set to allow viewing of adult works
cookie = Mechanize::Cookie.new("skip_fetish_warning", "1")
cookie.domain = "seiga.nicovideo.jp"
session = Cache.get("nico-seiga-session")
if session
cookie = Mechanize::Cookie.new("user_session", session)
cookie.domain = ".nicovideo.jp"
cookie.path = "/"
mech.cookie_jar.add(cookie)
mech.redirect_ok = true
mech
else
mech.get("https://account.nicovideo.jp/login") do |page|
page.form_with(:id => "login_form") do |form|
form["mail_tel"] = Danbooru.config.nico_seiga_login
form["password"] = Danbooru.config.nico_seiga_password
end.click_button
end
session = mech.cookie_jar.cookies.select{|c| c.name == "user_session"}.first
if session
Cache.put("nico-seiga-session", session.value, 1.month)
else
raise "Session not found"
end
end
end
memoize :api_client
# This cookie needs to be set to allow viewing of adult works
cookie = Mechanize::Cookie.new("skip_fetish_warning", "1")
cookie.domain = "seiga.nicovideo.jp"
cookie.path = "/"
mech.cookie_jar.add(cookie)
mech.redirect_ok = true
mech
end
memoize :agent
end
end
end

View File

@@ -1,155 +1,158 @@
module Sources
module Strategies
class Nijie < Base
attr_reader :image_urls
PICTURE = %r{pic\d+\.nijie.info/nijie_picture/}
PAGE = %r{\Ahttps?://nijie\.info/view\.php.+id=\d+}
DIFF = %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
def self.url_match?(url)
url =~ /^https?:\/\/(?:.+?\.)?nijie\.info/
end
def initialize(url, referer_url=nil)
super(normalize_url(url), normalize_url(referer_url))
end
def referer_url
if @referer_url =~ /nijie\.info\/view\.php.+id=\d+/ && @url =~ /pic\d+\.nijie.info\/nijie_picture\//
@referer_url
else
@url
end
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/^https?:\/\/(?:.+?\.)?nijie\.info/) }
end
def site_name
"Nijie"
end
def image_urls
if url =~ PICTURE
return [url]
end
# http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
if url =~ DIFF
return [normalize_thumbnails(url)]
end
page.search("div#gallery a > img").map do |img|
# //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
normalize_thumbnails("https:" + img.attr("src"))
end.uniq
end
def page_url
[url, referer_url].each do |x|
if x =~ PAGE
return x
end
if x =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
return "https://nijie.info/view.php?id=#{$1}"
end
end
return super
end
def profile_url
links = page.search("a.name")
if links.any?
return "https://nijie.info/" + links[0]["href"]
end
return nil
end
def artist_name
links = page.search("a.name")
if links.any?
return links[0].text
end
return nil
end
def artist_commentary_title
page.search("h2.illust_title").text
end
def artist_commentary_desc
page.search('meta[property="og:description"]').attr("content").value
end
def tags
links = page.search("div#view-tag a").find_all do |node|
node["href"] =~ /search\.php/
end
if links.any?
return links.map do |node|
[node.inner_text, "https://nijie.info" + node.attr("href")]
end
end
return []
end
def unique_id
profile_url =~ /nijie\.info\/members.php\?id=(\d+)/
"nijie" + $1.to_s
end
def image_url
image_urls.first
end
def get
page = agent.get(referer_url)
if page.search("div#header-login-container").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nijie-session")
@agent = nil
page = agent.get(referer_url)
end
@artist_name, @profile_url = get_profile_from_page(page)
@image_urls = get_image_urls_from_page(page)
@tags = get_tags_from_page(page)
@artist_commentary_title, @artist_commentary_desc = get_commentary_from_page(page)
end
protected
public
def self.to_dtext(text)
text = text.gsub(/\r\n|\r/, "<br>")
DText.from_html(text).strip
end
def get_commentary_from_page(page)
title = page.search("h2.illust_title").text
desc = page.search('meta[property="og:description"]').attr("content").value
[title, desc]
def normalize_thumbnails(x)
x.gsub(%r!__rs_l120x120/!i, "")
end
def get_profile_from_page(page)
links = page.search("a.name")
def page
doc = agent.get(page_url)
if links.any?
profile_url = "http://nijie.info/" + links[0]["href"]
artist_name = links[0].text
else
profile_url = nil
artist_name = nil
if doc.search("div#header-login-container").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("nijie-session")
doc = agent.get(page_url)
end
return [artist_name, profile_url].compact
end
def get_image_urls_from_page(page)
page.search("div#gallery a > img").map do |img|
# //pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => https://pic01.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
url = "https:" + img.attr("src")
normalize_image_url(url)
end
end
def get_tags_from_page(page)
# puts page.root.to_xhtml
links = page.search("div#view-tag a").find_all do |node|
node["href"] =~ /search\.php/
end
if links.any?
links.map do |node|
[node.inner_text, "http://nijie.info" + node.attr("href")]
end
else
[]
end
end
def normalize_url(url)
if url =~ %r!https?://nijie\.info/view_popup\.php.+id=(\d+)!
return "http://nijie.info/view.php?id=#{$1}"
else
return url
end
end
def normalize_image_url(image_url)
# http://pic03.nijie.info/__rs_l120x120/nijie_picture/diff/main/218856_0_236014_20170620101329.png
# => http://pic03.nijie.info/nijie_picture/diff/main/218856_3_236014_20170620101331.png
if image_url =~ %r!\Ahttps?://pic\d+\.nijie\.info/__rs_l120x120/nijie_picture/diff/main/[0-9_]+\.\w+\z!i
image_url = image_url.gsub(%r!__rs_l120x120/!i, "")
end
image_url = image_url.gsub(%r!\Ahttp:!i, "https:")
image_url
return doc
end
memoize :page
def agent
@agent ||= begin
mech = Mechanize.new
mech = Mechanize.new
session = Cache.get("nijie-session")
if session
cookie = Mechanize::Cookie.new("NIJIEIJIEID", session)
cookie.domain = ".nijie.info"
cookie.path = "/"
mech.cookie_jar.add(cookie)
else
mech.get("http://nijie.info/login.php") do |page|
page.form_with(:action => "/login_int.php") do |form|
form['email'] = Danbooru.config.nijie_login
form['password'] = Danbooru.config.nijie_password
end.click_button
end
session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
Cache.put("nijie-session", session.value, 1.month) if session
end
# This cookie needs to be set to allow viewing of adult works while anonymous
cookie = Mechanize::Cookie.new("R18", "1")
session = Cache.get("nijie-session")
if session
cookie = Mechanize::Cookie.new("NIJIEIJIEID", session)
cookie.domain = ".nijie.info"
cookie.path = "/"
mech.cookie_jar.add(cookie)
else
mech.get("https://nijie.info/login.php") do |page|
page.form_with(:action => "/login_int.php") do |form|
form['email'] = Danbooru.config.nijie_login
form['password'] = Danbooru.config.nijie_password
end.click_button
end
session = mech.cookie_jar.cookies.select{|c| c.name == "NIJIEIJIEID"}.first
Cache.put("nijie-session", session.value, 1.day) if session
end
mech
# This cookie needs to be set to allow viewing of adult works while anonymous
cookie = Mechanize::Cookie.new("R18", "1")
cookie.domain = ".nijie.info"
cookie.path = "/"
mech.cookie_jar.add(cookie)
mech
rescue Mechanize::ResponseCodeError => x
if x.response_code.to_i == 429
sleep(5)
retry
else
raise
end
end
memoize :agent
end
end
end

View File

@@ -0,0 +1,43 @@
module Sources
module Strategies
class Null < Base
def self.match?(*urls)
true
end
def image_urls
[url]
end
def page_url
url
end
def normalized_for_artist_finder?
true
end
def normalizable_for_artist_finder?
false
end
def normalize_for_artist_finder
url
end
def site_name
URI.parse(url).hostname || "N/A"
rescue
"N/A"
end
def unique_id
url
end
def rewrite(url, headers, data)
return [url, headers, data]
end
end
end
end

View File

@@ -1,62 +1,80 @@
# html page urls:
# https://pawoo.net/@evazion/19451018
# https://pawoo.net/web/statuses/19451018
#
# image urls:
# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
#
# artist urls:
# https://pawoo.net/@evazion
# https://pawoo.net/web/accounts/47806
module Sources::Strategies
class Pawoo < Base
attr_reader :image_urls
IMAGE = %r!\Ahttps?://img\.pawoo\.net/media_attachments/files/(\d+/\d+/\d+)!
def self.url_match?(url)
PawooApiClient::Status.is_match?(url) || PawooApiClient::Account.is_match?(url)
end
def referer_url
normalized_url
def self.match?(*urls)
urls.compact.any? do |x|
x =~ IMAGE || PawooApiClient::Status.is_match?(x) || PawooApiClient::Account.is_match?(x)
end
end
def site_name
"Pawoo"
end
def api_response
@response ||= PawooApiClient.new.get(normalized_url)
def image_url
image_urls.first
end
def get
response = api_response
@artist_name = response.account_name
@profile_url = response.profile_url
@image_url = response.image_urls.first
@image_urls = response.image_urls
@tags = response.tags
@artist_commentary_title = nil
@artist_commentary_desc = response.commentary
end
def normalized_url
if self.class.url_match?(@url)
@url
elsif self.class.url_match?(@referer_url)
@referer_url
# https://img.pawoo.net/media_attachments/files/001/297/997/small/c4272a09570757c2.png
# https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png
# https://pawoo.net/media/lU2uV7C1MMQSb1czwvg
def image_urls
if url =~ %r!#{IMAGE}/small/([a-z0-9]+\.\w+)\z!i
return ["https://img.pawoo.net/media_attachments/files/#{$1}/original/#{$2}"]
end
if url =~ %r!#{IMAGE}/original/([a-z0-9]+\.\w+)\z!i
return [url]
end
return api_response.image_urls
end
# https://pawoo.net/@evazion/19451018
# https://pawoo.net/web/statuses/19451018
def page_url
[url, referer_url].each do |x|
if PawooApiClient::Status.is_match?(x)
return x
end
end
return super
end
# https://pawoo.net/@evazion
# https://pawoo.net/web/accounts/47806
def profile_url
if url =~ PawooApiClient::PROFILE2
return "https://pawoo.net/@#{$1}"
end
api_response.profile_url
end
def artist_name
api_response.account_name
end
def artist_commentary_title
nil
end
def artist_commentary_desc
api_response.commentary
end
def tags
api_response.tags
end
def normalizable_for_artist_finder?
true
end
def normalize_for_artist_finder!
get
@profile_url || @url
def normalize_for_artist_finder
profile_url
end
def dtext_artist_commentary_desc
@@ -68,5 +86,18 @@ module Sources::Strategies
end
end.strip
end
public
def api_response
[url, referer_url].each do |x|
if client = PawooApiClient.new.get(x)
return client
end
end
nil
end
memoize :api_response
end
end

View File

@@ -1,122 +1,23 @@
# encoding: UTF-8
require 'csv'
module Sources
module Strategies
class Pixiv < Base
attr_reader :zip_url, :ugoira_frame_data, :ugoira_content_type
MONIKER = %r!(?:[a-zA-Z0-9_-]+)!
PROFILE = %r!\Ahttps?://www\.pixiv\.net/member\.php\?id=[0-9]+\z!
EXT = %r!(?:jpg|jpeg|png|gif)!i
MONIKER = '(?:[a-zA-Z0-9_-]+)'
TIMESTAMP = '(?:[0-9]{4}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2}/[0-9]{2})'
EXT = "(?:jpg|jpeg|png|gif)"
WEB = %r!(?:\A(?:https?://)?www\.pixiv\.net)!
I12 = %r!(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)!
IMG = %r!(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)!
PXIMG = %r!(?:\A(?:https?://)?i\.pximg\.net)!
TOUCH = %r!(?:\A(?:https?://)?touch\.pixiv\.net)!
NOVEL_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/novel/show\.php\?id=(\d+))!
FANBOX_IMAGE = %r!(?:\Ahttps?://fanbox\.pixiv\.net/images/post/(\d+))!
FANBOX_PAGE = %r!(?:\Ahttps?://www\.pixiv\.net/fanbox/creator/\d+/post/(\d+))!
WEB = '(?:\A(?:https?://)?www\.pixiv\.net)'
I12 = '(?:\A(?:https?://)?i[0-9]+\.pixiv\.net)'
IMG = '(?:\A(?:https?://)?img[0-9]*\.pixiv\.net)'
PXIMG = '(?:\A(?:https?://)?i\.pximg\.net)'
TOUCH = '(?:\A(?:https?://)?touch\.pixiv\.net)'
def self.url_match?(url)
url =~ /#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}/i
end
def referer_url
if @referer_url =~ /pixiv\.net\/member_illust.+mode=medium/ && @url =~ /#{IMG}|#{I12}/
@referer_url
else
@url
end
end
def site_name
"Pixiv"
end
def unique_id
@pixiv_moniker
end
def fake_referer
"http://www.pixiv.net"
end
def normalized_for_artist_finder?
url =~ %r!\Ahttp://www\.pixiv\.net/member\.php\?id=[0-9]+\z/!
end
def normalizable_for_artist_finder?
has_moniker? || sample_image? || full_image? || work_page?
end
def normalize_for_artist_finder!
@illust_id = illust_id_from_url!
@metadata = get_metadata_from_papi(@illust_id)
"http://www.pixiv.net/member.php?id=#{@metadata.user_id}/"
end
def translate_tag(tag)
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
translated_tags = super(normalized_tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags
end
def get
return unless illust_id_from_url
@illust_id = illust_id_from_url
@metadata = get_metadata_from_papi(@illust_id)
page = agent.get(URI.parse(normalized_url))
if page.search("body.not-logged-in").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("pixiv-phpsessid")
@agent = nil
page = agent.get(URI.parse(normalized_url))
end
@artist_name = @metadata.name
@profile_url = "http://www.pixiv.net/member.php?id=#{@metadata.user_id}"
@pixiv_moniker = @metadata.moniker
@zip_url, @ugoira_frame_data, @ugoira_content_type = get_zip_url_from_api
@tags = @metadata.tags.map do |tag|
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
end
@page_count = @metadata.page_count
@artist_commentary_title = @metadata.artist_commentary_title
@artist_commentary_desc = @metadata.artist_commentary_desc
is_manga = @page_count > 1
if !@zip_url
page = manga_page_from_url(@url).to_i
@image_url = image_urls[page]
end
end
def rewrite_thumbnails(thumbnail_url, is_manga=nil)
thumbnail_url = rewrite_new_medium_images(thumbnail_url)
thumbnail_url = rewrite_medium_ugoiras(thumbnail_url)
thumbnail_url = rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
return thumbnail_url
end
def agent
@agent ||= PixivWebAgent.build
end
def file_url
image_url || zip_url
end
def image_urls
@metadata.pages
def self.match?(*urls)
urls.compact.any? { |x| x.match?(/#{WEB}|#{IMG}|#{I12}|#{TOUCH}|#{PXIMG}|#{FANBOX_IMAGE}/i) }
end
def self.to_dtext(text)
@@ -137,18 +38,147 @@ module Sources
DText.from_html(text)
end
def illust_id_from_url
if sample_image? || full_image? || work_page?
illust_id_from_url!
else
nil
def site_name
"Pixiv"
end
def image_urls
image_urls_sub.
map {|x| rewrite_cdn(x)}
rescue PixivApiClient::BadIDError
[url]
end
def page_url
if novel_id.present?
return "https://www.pixiv.net/novel/show.php?id=#{novel_id}&mode=cover"
end
rescue Sources::Error
raise if Rails.env.test?
if fanbox_id.present?
return "https://www.pixiv.net/fanbox/creator/#{metadata.user_id}/post/#{fanbox_id}"
end
if illust_id.present?
return "http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{illust_id}"
end
return url
rescue PixivApiClient::BadIDError
nil
end
def canonical_url
return image_url
end
def profile_url
[url, referer_url].each do |x|
if x =~ PROFILE
return x
end
end
"https://www.pixiv.net/member.php?id=#{metadata.user_id}"
rescue PixivApiClient::BadIDError
nil
end
def illust_id_from_url!
def artist_name
metadata.name
rescue PixivApiClient::BadIDError
nil
end
def artist_commentary_title
metadata.artist_commentary_title
rescue PixivApiClient::BadIDError
nil
end
def artist_commentary_desc
metadata.artist_commentary_desc
rescue PixivApiClient::BadIDError
nil
end
def headers
if fanbox_id.present?
# need the session to download fanbox images
return {
"Referer" => "https://www.pixiv.net/fanbox",
"Cookie" => HTTP::Cookie.cookie_value(agent.cookies)
}
end
return {
"Referer" => "https://www.pixiv.net"
}
end
def normalized_for_artist_finder?
url =~ PROFILE
end
def normalizable_for_artist_finder?
illust_id.present? || novel_id.present? || fanbox_id.present?
end
def unique_id
moniker
end
def tags
metadata.tags.map do |tag|
[tag, "https://www.pixiv.net/search.php?s_mode=s_tag_full&#{{word: tag}.to_param}"]
end
rescue PixivApiClient::BadIDError
[]
end
memoize :tags
def translate_tag(tag)
normalized_tag = tag.gsub(/\d+users入り\z/i, "")
translated_tags = super(normalized_tag)
if translated_tags.empty? && normalized_tag.include?("/")
translated_tags = normalized_tag.split("/").flat_map { |tag| super(tag) }
end
translated_tags
end
public
def image_urls_sub
# there's too much normalization bullshit we have to deal with
# raw urls, so just fetch the canonical url from the api every
# time.
if manga_page.present?
return [metadata.pages[manga_page]]
end
if metadata.pages.is_a?(Hash)
return [ugoira_zip_url]
end
return metadata.pages
end
def rewrite_cdn(x)
if x =~ %r{\Ahttps?:\/\/(?:\w+\.)?pixiv\.net\.edgesuite\.net}
return x.sub(".edgesuite.net", "")
end
return x
end
# in order to prevent recursive loops, this method should not make any
# api calls and only try to extract the illust_id from the url. therefore,
# even though it makes sense to reference page_url here, it will only look
# at (url, referer_url).
def illust_id
# http://img18.pixiv.net/img/evazion/14901720.png
#
# http://i2.pixiv.net/img18/img/evazion/14901720.png
@@ -165,228 +195,166 @@ module Sources
#
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
if url =~ %r!/(\d+)(?:_\w+)?\.(?:jpg|jpeg|png|gif|zip)!i
$1
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
elsif url =~ /illust_id=(\d+)/i
$1
# http://www.pixiv.net/i/18557054
elsif url =~ %r!pixiv\.net/i/(\d+)!i
$1
else
raise Sources::Error.new("Couldn't get illust ID from URL: #{url}")
end
end
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p1_master1200.jpg
# => http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p1.png
#
# http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
# => http://i.pximg.net/img-original/img/2014/05/15/23/53/59/43521009_p1.jpg
def rewrite_new_medium_images(thumbnail_url)
if thumbnail_url =~ %r!/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i ||
thumbnail_url =~ %r!/img-master/img/#{TIMESTAMP}/\d+_p\d+_\w+\.jpg!i
page = manga_page_from_url(@url).to_i
thumbnail_url = @metadata.pages[page]
return $1
end
thumbnail_url
end
# http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira600x600.zip
# => http://i3.pixiv.net/img-zip-ugoira/img/2014/12/03/04/58/24/47378698_ugoira1920x1080.zip
def rewrite_medium_ugoiras(thumbnail_url)
if thumbnail_url =~ %r!/img-zip-ugoira/img/.*/\d+_ugoira600x600.zip!i
thumbnail_url = thumbnail_url.sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end
thumbnail_url
end
# If the thumbnail is for a manga gallery, it needs to be rewritten like this:
#
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# => http://i2.pixiv.net/img18/img/evazion/14901720_big_p0.png
#
# Otherwise, it needs to be rewritten like this:
#
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# => http://i2.pixiv.net/img18/img/evazion/14901720.png
#
def rewrite_old_small_and_medium_images(thumbnail_url, is_manga)
if thumbnail_url =~ %r!/img/#{MONIKER}/\d+_[ms]\.#{EXT}!i
if is_manga.nil?
page_count = @metadata.page_count
is_manga = page_count > 1
[url, referer_url].each do |x|
# http://www.pixiv.net/member_illust.php?mode=medium&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=big&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga&illust_id=18557054
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=18557054&page=1
if x =~ /illust_id=(\d+)/i
return $1
end
if is_manga
page = manga_page_from_url(@url)
return thumbnail_url.sub(/_[ms]\./, "_big_p#{page}.")
else
return thumbnail_url.sub(/_[ms]\./, ".")
# http://www.pixiv.net/i/18557054
if x =~ %r!pixiv\.net/i/(\d+)!i
return $1
end
end
return thumbnail_url
raise Sources::Error.new("Couldn't get illust ID from URL (#{url}, #{referer_url})")
end
memoize :illust_id
def novel_id
[url, referer_url].each do |x|
if x =~ NOVEL_PAGE
return $1
end
end
return nil
end
memoize :novel_id
def fanbox_id
[url, referer_url].each do |x|
if x =~ FANBOX_PAGE
return $1
end
if x =~ FANBOX_IMAGE
return $1
end
end
return nil
end
memoize :fanbox_id
def agent
PixivWebAgent.build
end
memoize :agent
def page
agent.get(URI.parse(page_url))
if page.search("body.not-logged-in").any?
# Session cache is invalid, clear it and log in normally.
Cache.delete("pixiv-phpsessid")
@agent = nil
page = agent.get(URI.parse(page_url))
end
page
end
memoize :page
def metadata
if novel_id.present?
return PixivApiClient.new.novel(novel_id)
end
if fanbox_id.present?
return PixivApiClient.new.fanbox(fanbox_id)
end
return PixivApiClient.new.work(illust_id)
end
memoize :metadata
def moniker
# we can sometimes get the moniker from the url
if url =~ %r!#{IMG}/img/(#{MONIKER})!i
return $1
end
if url =~ %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
return $1
end
if url =~ %r!#{WEB}/stacc/(#{MONIKER})/?$!i
return $1
end
return metadata.moniker
end
memoize :moniker
def page_count
metadata.page_count
end
def manga_page_from_url(url)
def data
return {
ugoira_frame_data: ugoira_frame_data
}
end
def ugoira_zip_url
if metadata.pages.is_a?(Hash) && metadata.pages["ugoira600x600"]
return metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
end
end
memoize :ugoira_zip_url
def ugoira_frame_data
return metadata.json.dig("metadata", "frames")
end
memoize :ugoira_frame_data
def ugoira_content_type
case metadata.json["image_urls"].to_s
when /\.jpg/
return "image/jpeg"
when /\.png/
return "image/png"
when /\.gif/
return "image/gif"
end
raise Sources::Error.new("content type not found for (#{url}, #{referer_url})")
end
memoize :ugoira_content_type
def is_manga?
page_count > 1
end
# Returns the current page number of the manga. This will not
# make any api calls and only looks at (url, referer_url).
def manga_page
# http://i2.pixiv.net/img04/img/syounen_no_uta/46170939_p0.jpg
# http://i1.pixiv.net/c/600x600/img-master/img/2014/09/24/23/25/08/46168376_p0_master1200.jpg
# http://i1.pixiv.net/img-original/img/2014/09/25/23/09/29/46183440_p0.jpg
if url =~ %r!/\d+_p(\d+)(?:_\w+)?\.#{EXT}!i
$1
return $1.to_i
end
# http://www.pixiv.net/member_illust.php?mode=manga_big&illust_id=46170939&page=0
elsif url =~ /page=(\d+)/i
$1
else
0
end
end
def get_moniker_from_url
case url
when %r!#{IMG}/img/(#{MONIKER})!i
$1
when %r!#{I12}/img[0-9]+/img/(#{MONIKER})!i
$1
when %r!#{WEB}/stacc/(#{MONIKER})/?$!i
$1
else
false
end
end
def has_moniker?
get_moniker_from_url != false
end
def get_image_url_from_page(page, is_manga)
if is_manga
elements = page.search("div.works_display a img").find_all do |node|
node["src"] !~ /source\.pixiv\.net/
[url, referer_url].each do |x|
if x =~ /page=(\d+)/i
return $1.to_i
end
else
elements = page.search("div.works_display div img.big")
elements = page.search("div.works_display div img") if elements.empty?
end
if elements.any?
element = elements.first
thumbnail_url = element.attr("src") || element.attr("data-src")
return rewrite_thumbnails(thumbnail_url, is_manga)
end
if page.body =~ /"original":"(https:.+?)"/
return $1.gsub(/\\\//, '/')
end
end
def get_zip_url_from_api
if @metadata.pages.is_a?(Hash) && @metadata.pages["ugoira600x600"]
zip_url = @metadata.pages["ugoira600x600"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
frame_data = @metadata.json["metadata"]["frames"]
content_type = nil
case @metadata.json["image_urls"].to_s
when /\.jpg/
content_type = "image/jpeg"
when /\.png/
content_type = "image/png"
when /\.gif/
content_type = "image/gif"
end
return [zip_url, frame_data, content_type]
end
end
def get_zip_url_from_page(page)
scripts = page.search("body script").find_all do |node|
node.text =~ /_ugoira600x600\.zip/
end
if scripts.any?
javascript = scripts.first.text
json = javascript.match(/;pixiv\.context\.ugokuIllustData\s+=\s+(\{.+?\});(?:$|pixiv\.context)/)[1]
data = JSON.parse(json)
zip_url = data["src"].sub("_ugoira600x600.zip", "_ugoira1920x1080.zip")
frame_data = data["frames"]
content_type = data["mime_type"]
return [zip_url, frame_data, content_type]
end
end
def normalized_url
"http://www.pixiv.net/member_illust.php?mode=medium&illust_id=#{@illust_id}"
end
def get_metadata_from_papi(illust_id)
@metadata ||= PixivApiClient.new.works(illust_id)
end
def work_page?
return true if url =~ %r!(?:#{WEB}|#{TOUCH})/member_illust\.php! && url =~ %r!mode=(?:medium|big|manga|manga_big)! && url =~ %r!illust_id=\d+!
return true if url =~ %r!(?:#{WEB}|#{TOUCH})/i/\d+$!i
return false
end
def full_image?
# http://img18.pixiv.net/img/evazion/14901720.png?1234
return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
# http://i2.pixiv.net/img18/img/evazion/14901720.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_big_p1.png
return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+(?:_big_p\d+)?\.#{EXT}!i
# http://i1.pixiv.net/img-original/img/2014/10/02/13/51/23/46304396_p0.png
return true if url =~ %r!#{I12}/img-original/img/#{TIMESTAMP}/\d+_p\d+\.#{EXT}$!i
# http://i.pximg.net/img-original/img/2017/03/22/17/40/51/62041488_p0.jpg
return true if url =~ %r!#{PXIMG}/img-original/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i1.pixiv.net/img-zip-ugoira/img/2014/10/03/17/29/16/46323924_ugoira1920x1080.zip
return true if url =~ %r!(#{I12}|#{PXIMG})/img-zip-ugoira/img/#{TIMESTAMP}/\d+_ugoira\d+x\d+\.zip$!i
return false
end
def sample_image?
# http://img18.pixiv.net/img/evazion/14901720_m.png
return true if url =~ %r!#{IMG}/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
# http://i2.pixiv.net/img18/img/evazion/14901720_m.png
# http://i1.pixiv.net/img07/img/pasirism/18557054_p1.png
return true if url =~ %r!#{I12}/img\d+/img/#{MONIKER}/\d+_(?:[sm]|p\d+)\.#{EXT}!i
# http://i1.pixiv.net/c/600x600/img-master/img/2014/10/02/13/51/23/46304396_p0_master1200.jpg
# http://i2.pixiv.net/c/64x64/img-master/img/2014/10/09/12/59/50/46441917_square1200.jpg
return true if url =~ %r!#{I12}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}$!i
# http://i.pximg.net/img-master/img/2014/05/15/23/53/59/43521009_p1_master1200.jpg
return true if url =~ %r!#{PXIMG}/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i.pximg.net/c/600x600/img-master/img/2017/03/22/17/40/51/62041488_p0_master1200.jpg
return true if url =~ %r!#{PXIMG}/c/\d+x\d+/img-master/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
# http://i1.pixiv.net/img-inf/img/2011/05/01/23/28/04/18557054_s.png
# http://i2.pixiv.net/img-inf/img/2010/11/30/08/54/06/14901765_64x64.jpg
return true if url =~ %r!#{I12}/img-inf/img/#{TIMESTAMP}/\d+_\w+\.#{EXT}!i
return false
return nil
end
memoize :manga_page
end
end
end

View File

@@ -1,28 +1,52 @@
module Sources::Strategies
class Tumblr < Base
extend Memoist
DOMAIN = %r{(data|(\d+\.)?media)\.tumblr\.com}
MD5 = %r{(?<md5>[0-9a-f]{32})}i
FILENAME = %r{(?<filename>(tumblr_(inline_)?)?[a-z0-9]+(_r[0-9]+)?)}i
SIZES = %r{(?:250|400|500|500h|540|1280|raw)}i
EXT = %r{(?<ext>\w+)}
IMAGE = %r!\Ahttps?://#{DOMAIN}/(?<dir>#{MD5}/)?#{FILENAME}_#{SIZES}\.#{EXT}\z!i
POST = %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
def self.url_match?(url)
blog_name, post_id = parse_info_from_url(url)
blog_name.present? && post_id.present?
def self.match?(*urls)
urls.compact.any? do |url|
blog_name, post_id = parse_info_from_url(url)
url =~ IMAGE || blog_name.present? && post_id.present?
end
end
def referer_url
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
"https://#{blog_name}.tumblr.com/post/#{post_id}"
end
def tags
post[:tags].map do |tag|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
[tag.tr(" _-", "_"), "https://tumblr.com/tagged/#{CGI::escape(tag.tr(" _-", "-"))}"]
end.uniq
def self.parse_info_from_url(url)
if url =~ POST
[$~[:blog_name], $~[:post_id]]
else
[]
end
end
def site_name
"Tumblr"
end
def image_urls
image_urls_sub
.uniq
.map {|x| normalize_cdn(x)}
.map {|x| find_largest(x)}
.compact
.uniq
end
def page_url
[url, referer_url].each do |x|
if x =~ POST
blog_name, post_id = self.class.parse_info_from_url(x)
return "https://#{blog_name}.tumblr.com/post/#{post_id}"
end
end
return super
end
def profile_url
"https://#{artist_name}.tumblr.com/"
end
@@ -35,8 +59,10 @@ module Sources::Strategies
case post[:type]
when "text", "link"
post[:title]
when "answer"
"#{post[:asking_name]} asked: #{post[:question]}"
else
nil
end
@@ -46,94 +72,133 @@ module Sources::Strategies
case post[:type]
when "text"
post[:body]
when "link"
post[:description]
when "photo", "video"
post[:caption]
when "answer"
post[:answer]
else
nil
end
end
def tags
post[:tags].map do |tag|
# normalize tags: space, underscore, and hyphen are equivalent in tumblr tags.
etag = tag.gsub(/[ _-]/, "_")
[etag, "https://tumblr.com/tagged/#{CGI.escape(etag)}"]
end.uniq
end
memoize :tags
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip
end
def image_url
image_urls.first
end
public
def image_urls
urls = case post[:type]
when "photo"
post[:photos].map do |photo|
self.class.normalize_image_url(photo[:original_size][:url])
end
when "video"
[post[:video_url]]
else
[]
def image_urls_sub
list = []
if url =~ IMAGE
list << url
end
urls += self.class.parse_inline_images(artist_commentary_desc)
urls
end
if page_url !~ POST
return list
end
def get
end
module HelperMethods
extend ActiveSupport::Concern
module ClassMethods
def parse_info_from_url(url)
url =~ %r!\Ahttps?://(?<blog_name>[^.]+)\.tumblr\.com/(?:post|image)/(?<post_id>\d+)!i
[$1, $2]
end
def parse_inline_images(text)
html = Nokogiri::HTML.fragment(text)
image_urls = html.css("img").map { |node| node["src"] }
image_urls = image_urls.map(&method(:normalize_image_url))
image_urls
end
def normalize_image_url(url)
url, _, _ = Downloads::RewriteStrategies::Tumblr.new.rewrite(url, {})
url
if post[:type] == "photo"
list += post[:photos].map do |photo|
photo[:original_size][:url]
end
end
def normalized_url
if self.class.url_match?(@referer_url)
@referer_url
elsif self.class.url_match?(@url)
@url
if post[:type] == "video"
list << post[:video_url]
end
if inline_images.any?
list += inline_images.to_a
end
if list.any?
return list
end
raise "image url not found for (#{url}, #{referer_url})"
end
# Normalize cdn subdomains.
#
# https://gs1.wac.edgecastcdn.net/8019B6/data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => http://data.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
def normalize_cdn(x)
# does this work?
x.sub(%r!\Ahttps?://gs1\.wac\.edgecastcdn\.net/8019B6/media\.tumblr\.com!i, "http://media.tumblr.com")
end
# Look for the biggest available version on media.tumblr.com. A bigger
# version may or may not exist.
#
# https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg
# => https://media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_raw.jpg
#
# https://68.media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_250.gif
# => https://media.tumblr.com/ee02048f5578595badc95905e17154b4/tumblr_inline_ofbr4452601sk4jd9_500.gif
#
# https://25.media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_500.png
# => https://media.tumblr.com/tumblr_m2dxb8aOJi1rop2v0o1_1280.png
#
# http://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_400.jpg
# => https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg
#
# http://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
# => https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg
def find_largest(x)
if x =~ IMAGE
sizes = [1280, 640, 540, "500h", 500, 400, 250]
candidates = sizes.map do |size|
"https://media.tumblr.com/#{$~[:dir]}#{$~[:filename]}_#{size}.#{$~[:ext]}"
end
return candidates.find do |candidate|
http_exists?(candidate, headers)
end
end
return x
end
module ApiMethods
def client
raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
::TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
end
def api_response
blog_name, post_id = self.class.parse_info_from_url(normalized_url)
client.posts(blog_name, post_id)
end
def post
api_response[:posts].first
end
def inline_images
html = Nokogiri::HTML.fragment(artist_commentary_desc)
html.css("img").map { |node| node["src"] }
end
memoize :inline_images
include ApiMethods
include HelperMethods
def client
raise NotImplementedError.new("Tumblr support is not available (API key not configured).") if Danbooru.config.tumblr_consumer_key.nil?
memoize :client, :api_response
TumblrApiClient.new(Danbooru.config.tumblr_consumer_key)
end
memoize :client
def api_response
blog_name, post_id = self.class.parse_info_from_url(page_url)
raise "Page url not found for (#{url}, #{referer_url})" if blog_name.nil?
client.posts(blog_name, post_id)
end
memoize :api_response
def post
api_response[:posts].first
end
end
end

View File

@@ -1,52 +1,94 @@
module Sources::Strategies
class Twitter < Base
attr_reader :image_urls
PAGE = %r!\Ahttps?://(?:mobile\.)?twitter\.com!i
ASSET = %r!\A(https?://(?:video|pbs)\.twimg\.com/media/)}!i
def self.url_match?(url)
self.status_id_from_url(url).present?
def self.match?(*urls)
urls.compact.any? { |x| x =~ PAGE || x =~ ASSET}
end
def referer_url
normalized_url
end
# https://twitter.com/i/web/status/943446161586733056
# https://twitter.com/motty08111213/status/943446161586733056
def self.status_id_from_url(url)
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
return $1
end
def normalized_url
"https://twitter.com/#{artist_name}/status/#{status_id}"
end
def artist_name
api_response.attrs[:user][:screen_name]
return nil
end
def site_name
"Twitter"
end
def api_response
@api_response ||= TwitterService.new.client.status(status_id, tweet_mode: "extended")
end
def get
attrs = api_response.attrs
@profile_url = "https://twitter.com/" + attrs[:user][:screen_name]
@image_urls = TwitterService.new.image_urls(api_response)
@image_url = @image_urls.first
@artist_commentary_title = ""
@artist_commentary_desc = attrs[:full_text]
@tags = attrs[:entities][:hashtags].map do |text:, indices:|
[text, "https://twitter.com/hashtag/#{text}"]
def image_urls
if url =~ /(#{ASSET}[^:]+)/
return [$1 + ":orig" ]
end
rescue ::Twitter::Error::Forbidden
[url, referer_url].each do |x|
if x =~ PAGE
return service.image_urls(api_response)
end
end
rescue Twitter::Error::NotFound
url
end
memoize :image_urls
def page_url
[url, referer_url].each do |x|
if self.class.status_id_from_url(x).present?
return x
end
end
return super
end
def normalize_for_artist_finder!
url.downcase
def profile_url
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(\w+)}i
if $1 != "i"
return "https://twitter.com/#{$1}"
end
end
"https://twitter.com/" + api_response.attrs[:user][:screen_name]
rescue Twitter::Error::NotFound
nil
end
def artist_name
api_response.attrs[:user][:screen_name]
rescue Twitter::Error::NotFound
nil
end
def artist_commentary_title
""
end
def artist_commentary_desc
api_response.attrs[:full_text]
rescue Twitter::Error::NotFound
nil
end
def normalizable_for_artist_finder?
true
url =~ PAGE
end
def normalize_for_artist_finder
profile_url.downcase
end
def tags
api_response.attrs[:entities][:hashtags].map do |text:, indices:|
[text, "https://twitter.com/hashtag/#{text}"]
end
end
memoize :tags
def dtext_artist_commentary_desc
url_replacements = api_response.urls.map do |obj|
[obj.url.to_s, obj.expanded_url.to_s]
@@ -63,19 +105,23 @@ module Sources::Strategies
desc = desc.gsub(%r!@([a-zA-Z0-9_]+)!, '"@\\1":[https://twitter.com/\\1]')
desc.strip
end
memoize :dtext_artist_commentary_desc
public
def service
TwitterService.new
end
memoize :service
def api_response
service.client.status(status_id, tweet_mode: "extended")
end
memoize :api_response
def status_id
self.class.status_id_from_url(@url) || self.class.status_id_from_url(@referer_url)
end
# https://twitter.com/i/web/status/943446161586733056
# https://twitter.com/motty08111213/status/943446161586733056
def self.status_id_from_url(url)
if url =~ %r{\Ahttps?://(?:mobile\.)?twitter\.com/(?:i/web|\w+)/status/(\d+)}i
$1
else
nil
end
[url, referer_url].map {|x| self.class.status_id_from_url(x)}.compact.first
end
memoize :status_id
end
end