Get rid of `normalized_for_artist_finder?` and `normalizable_for_artist_finder?`. This was legacy bullshit that was originally designed to avoid API calls when saving artist entries containing old Pixiv direct image urls that had already been normalized, or that couldn't be normalized because they were bad id. Nowadays we store profile urls in artist entries instead of direct image urls, so we don't normally need to do any API calls to normalize the profile url. Strategies should take care to avoid triggering API calls inside `profile_url` when possible.
219 lines
6.9 KiB
Ruby
219 lines
6.9 KiB
Ruby
# Image URLS
|
|
# * http://ww1.sinaimg.cn/large/69917555gw1f6ggdghk28j20c87lbhdt.jpg
|
|
#
|
|
# Image Samples
|
|
# * http://ww4.sinaimg.cn/mw690/77a2d531gw1f4u411ws3aj20m816fagg.jpg
|
|
# * https://wx4.sinaimg.cn/orj360/e3930166gy1g546bz86cij20u00u040y.jpg
|
|
# * http://ww3.sinaimg.cn/mw1024/0065kjmOgw1fabcanrzx6j30f00lcjwv.jpg
|
|
#
|
|
# Page URLS
|
|
# * http://weibo.com/3357910224/EEHA1AyJP
|
|
# * https://www.weibo.com/5501756072/IF9fugHzj?from=page_1005055501756072_profile&wvr=6&mod=weibotime
|
|
#
|
|
# * http://photo.weibo.com/5732523783/talbum/detail/photo_id/4029784374069389?prel=p6_3
|
|
# * http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t
|
|
# * http://tw.weibo.com/1300957955/3786333853668537
|
|
#
|
|
# * https://m.weibo.cn/detail/4506950043618873
|
|
# * https://m.weibo.cn/status/J33G4tH1B
|
|
#
|
|
# Video
|
|
# * https://www.weibo.com/5501756072/IF9fugHzj
|
|
#
|
|
# Profile URLS
|
|
# ### Short ID
|
|
# * https://www.weibo.com/5501756072
|
|
# * https://www.weibo.com/u/5501756072
|
|
# * https://m.weibo.cn/profile/5501756072
|
|
# * https://m.weibo.cn/u/5501756072
|
|
# ### Long ID
|
|
# * https://www.weibo.com/p/1005055501756072
|
|
|
|
module Sources
|
|
module Strategies
|
|
class Weibo < Base
|
|
PROFILE_URL_1 = %r{\Ahttps?://(?:(?:www|m)\.)?weibo\.c(?:om|n)/(?:(?:u|profile)/)?(?<artist_short_id>\d+)\z}i
|
|
PROFILE_URL_2 = %r{\Ahttps?://photo\.weibo\.com/(?<artist_short_id>\d+)}i
|
|
PROFILE_URL_3 = %r{\Ahttps?://(?:www\.)?weibo\.com/p/(?<artist_long_id>\d+)}i
|
|
|
|
PAGE_URL_1 = %r{\Ahttps?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i
|
|
PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i
|
|
PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i
|
|
PAGE_URL_4 = %r{\Ahttps?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i
|
|
|
|
IMAGE_URL = %r{\Ahttps?://\w{3}\.sinaimg\.cn/\w+/(?<image_id>\w{32})\.}i
|
|
|
|
def domains
|
|
["weibo.com", "weibo.cn", "weibocdn.com", "sinaimg.cn"]
|
|
end
|
|
|
|
def site_name
|
|
"Weibo"
|
|
end
|
|
|
|
def image_urls
|
|
urls = []
|
|
|
|
if url =~ IMAGE_URL
|
|
urls << self.class.convert_image_to_large(url)
|
|
elsif api_response.present?
|
|
if api_response["pics"].present?
|
|
urls += api_response["pics"].to_a.map { |pic| self.class.convert_image_to_large(pic["url"]) }
|
|
elsif api_response.dig("page_info", "type") == "video"
|
|
variants = api_response["page_info"]["media_info"].to_h.values + api_response["page_info"]["urls"].to_h.values
|
|
urls << variants.max_by do |variant|
|
|
if /template=(?<width>\d+)x(?<height>\d+)/ =~ variant.to_s
|
|
width.to_i * height.to_i
|
|
else
|
|
0
|
|
end
|
|
end
|
|
end
|
|
else
|
|
urls << url
|
|
end
|
|
|
|
urls
|
|
end
|
|
|
|
def image_url
|
|
image_id = url[PAGE_URL_2, :image_id] if url =~ PAGE_URL_2
|
|
|
|
if image_id.present?
|
|
image_urls.select { |i| i[IMAGE_URL, :image_id] == image_id }.compact.first
|
|
else
|
|
image_urls.first
|
|
end
|
|
end
|
|
|
|
def preview_urls
|
|
image_urls.map { |img| img.gsub(%r{.cn/\w+/(\w+)}, '.cn/orj360/\1') }
|
|
end
|
|
|
|
def page_url
|
|
if api_response.present?
|
|
artist_id = api_response["user"]["id"]
|
|
illust_id = api_response["bid"]
|
|
"https://www.weibo.com/#{artist_id}/#{illust_id}"
|
|
elsif url =~ IMAGE_URL
|
|
self.class.convert_image_to_large(url)
|
|
else
|
|
url
|
|
end
|
|
end
|
|
|
|
def tags
|
|
return [] if api_response.blank?
|
|
|
|
matches = api_response["text"]&.scan(/surl-text">#(.*?)#</).to_a.map { |m| m[0] }
|
|
|
|
matches.map do |match|
|
|
[match, "https://s.weibo.com/weibo/#{match}"]
|
|
end
|
|
end
|
|
|
|
def profile_urls
|
|
[profile_short_url, profile_long_url].compact
|
|
end
|
|
|
|
def profile_url
|
|
profile_urls.first
|
|
end
|
|
|
|
def profile_short_url
|
|
return if artist_short_id.blank?
|
|
|
|
"https://www.weibo.com/u/#{artist_short_id}"
|
|
end
|
|
|
|
def profile_long_url
|
|
return if artist_long_id.blank?
|
|
|
|
"https://www.weibo.com/p/#{artist_long_id}"
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
return if api_response.blank?
|
|
|
|
api_response["text"]
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
DText.from_html(artist_commentary_desc) do |element|
|
|
if element["href"].present?
|
|
href = Addressable::URI.heuristic_parse(element["href"])
|
|
href.site ||= "https://www.weibo.com"
|
|
href.scheme ||= "https"
|
|
element["href"] = href.to_s
|
|
end
|
|
|
|
if element["src"].present?
|
|
src = Addressable::URI.heuristic_parse(element["src"])
|
|
src.scheme ||= "https"
|
|
element["src"] = src.to_s
|
|
end
|
|
end
|
|
end
|
|
|
|
def normalize_for_source
|
|
return url if url =~ PAGE_URL_2
|
|
artist_id = artist_short_id_from_url
|
|
|
|
if artist_id.present?
|
|
if illust_base62_id.present?
|
|
"https://www.weibo.com/#{artist_id}/#{illust_base62_id}"
|
|
elsif illust_long_id.present?
|
|
"https://photo.weibo.com/#{artist_id}/talbum/detail/photo_id/#{illust_long_id}"
|
|
end
|
|
elsif mobile_url.present?
|
|
mobile_url
|
|
end
|
|
end
|
|
|
|
def self.convert_image_to_large(url)
|
|
url.gsub(%r{.cn/\w+/(\w+)}, '.cn/large/\1')
|
|
end
|
|
|
|
def illust_long_id
|
|
[url, referer_url].compact.map { |x| x[PAGE_URL_2, :illust_long_id] || x[PAGE_URL_3, :illust_long_id] || x[PAGE_URL_4, :illust_long_id] }.compact.first
|
|
end
|
|
|
|
def illust_base62_id
|
|
[url, referer_url].compact.map { |x| x[PAGE_URL_1, :illust_base62_id] || x[PAGE_URL_3, :illust_base62_id] }.compact.first
|
|
end
|
|
|
|
def artist_short_id_from_url
|
|
[url, referer_url].compact.map { |x| x[PROFILE_URL_1, :artist_short_id] || x[PROFILE_URL_2, :artist_short_id] || x[PAGE_URL_1, :artist_short_id] || x[PAGE_URL_4, :artist_short_id] }.compact.first
|
|
end
|
|
|
|
def artist_short_id
|
|
artist_short_id_from_url || api_response&.dig("user", "id")
|
|
end
|
|
|
|
def artist_long_id
|
|
[url, referer_url].compact.map { |x| x[PROFILE_URL_3, :artist_long_id] }.compact.first
|
|
end
|
|
|
|
def mobile_url
|
|
if illust_long_id.present?
|
|
"https://m.weibo.cn/detail/#{illust_long_id}"
|
|
elsif illust_base62_id.present?
|
|
"https://m.weibo.cn/status/#{illust_base62_id}"
|
|
end
|
|
end
|
|
|
|
def api_response
|
|
return nil if mobile_url.blank?
|
|
|
|
resp = Danbooru::Http.cache(1.minute).get(mobile_url)
|
|
json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1]
|
|
|
|
return nil if json_string.blank?
|
|
|
|
JSON.parse(json_string)["status"]
|
|
end
|
|
memoize :api_response
|
|
end
|
|
end
|
|
end
|