Merge pull request #4488 from nonamethanks/add_weibo_support

Add Weibo support
This commit is contained in:
evazion
2020-05-27 16:53:14 -05:00
committed by GitHub
4 changed files with 312 additions and 1 deletions

View File

@@ -107,6 +107,8 @@ module ArtistFinder
"ustream.tv/user", # http://www.ustream.tv/user/kazaputi
"vk.com", # https://vk.com/id425850679
"weibo.com", # http://www.weibo.com/5536681649
"weibo.com/u",
"weibo.com/p",
"wp.com",
"yande.re",
"youtube.com",

View File

@@ -12,7 +12,8 @@ module Sources
Strategies::Nijie,
Strategies::Pawoo,
Strategies::Moebooru,
Strategies::HentaiFoundry
Strategies::HentaiFoundry,
Strategies::Weibo
]
end

View File

@@ -0,0 +1,216 @@
# Image URLS
# * http://ww1.sinaimg.cn/large/69917555gw1f6ggdghk28j20c87lbhdt.jpg
#
# Image Samples
# * http://ww4.sinaimg.cn/mw690/77a2d531gw1f4u411ws3aj20m816fagg.jpg
# * https://wx4.sinaimg.cn/orj360/e3930166gy1g546bz86cij20u00u040y.jpg
# * http://ww3.sinaimg.cn/mw1024/0065kjmOgw1fabcanrzx6j30f00lcjwv.jpg
#
# Page URLS
# * http://weibo.com/3357910224/EEHA1AyJP
# * https://www.weibo.com/5501756072/IF9fugHzj?from=page_1005055501756072_profile&wvr=6&mod=weibotime
#
# * http://photo.weibo.com/5732523783/talbum/detail/photo_id/4029784374069389?prel=p6_3
# * http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t
# * http://tw.weibo.com/1300957955/3786333853668537
#
# * https://m.weibo.cn/detail/4506950043618873
# * https://m.weibo.cn/status/J33G4tH1B
#
# Video
# * https://www.weibo.com/5501756072/IF9fugHzj
#
# Profile URLS
# ### Short ID
# * https://www.weibo.com/5501756072
# * https://www.weibo.com/u/5501756072
# * https://m.weibo.cn/profile/5501756072
# * https://m.weibo.cn/u/5501756072
# ### Long ID
# * https://www.weibo.com/p/1005055501756072
module Sources
module Strategies
class Weibo < Base
PROFILE_URL_1 = %r{https?://(?:(?:www|m)\.)?weibo\.c(?:om|n)/(?:(?:u|profile)/)?(?<artist_short_id>\d+)\z}i
PROFILE_URL_2 = %r{https?://photo\.weibo\.com/(?<artist_short_id>\d+)}i
PROFILE_URL_3 = %r{https?://(?:www\.)?weibo\.com/p/(?<artist_long_id>\d+)}i
PAGE_URL_1 = %r{https?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i
PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i
PAGE_URL_3 = %r{https?://m\.weibo\.cn/(detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i
PAGE_URL_4 = %r{https?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i
IMAGE_URL = %r{https?://\w{3}\.sinaimg\.cn/\w+/(?<image_id>\w{32})\.}i
def domains
["weibo.com", "weibo.cn", "weibocdn.com", "sinaimg.cn"]
end
def site_name
"Weibo"
end
def image_urls
urls = []
if url =~ IMAGE_URL
urls << self.class.convert_image_to_large(url)
elsif api_response.present?
if api_response["pics"].present?
urls += api_response["pics"].to_a.map { |pic| self.class.convert_image_to_large(pic["url"]) }
elsif api_response.dig("page_info", "type") == "video"
variants = api_response["page_info"]["media_info"].to_h.values + api_response["page_info"]["urls"].to_h.values
urls << variants.max_by do |variant|
if /template=(?<width>\d+)x(?<height>\d+)/ =~ variant.to_s
width.to_i * height.to_i
else
0
end
end
end
else
urls << url
end
urls
end
def image_url
image_id = url[PAGE_URL_2, :image_id] if url =~ PAGE_URL_2
if image_id.present?
image_urls.select { |i| i[IMAGE_URL, :image_id] == image_id }.compact.first
else
image_urls.first
end
end
def preview_urls
image_urls.map { |img| img.gsub(%r{.cn/\w+/(\w+)}, '.cn/orj360/\1') }
end
def page_url
if api_response.present?
artist_id = api_response["user"]["id"]
illust_id = api_response["bid"]
"https://www.weibo.com/#{artist_id}/#{illust_id}"
elsif url =~ IMAGE_URL
self.class.convert_image_to_large(url)
else
url
end
end
def tags
return [] if api_response.blank?
matches = api_response["text"]&.scan(/surl-text">#(.*?)#</).to_a.map { |m| m[0] }
matches.map do |match|
[match, "https://s.weibo.com/weibo/#{match}"]
end
end
def profile_urls
[profile_short_url, profile_long_url].compact
end
def profile_url
profile_urls.first
end
def profile_short_url
return if artist_short_id.blank?
"https://www.weibo.com/u/#{artist_short_id}"
end
def profile_long_url
return if artist_long_id.blank?
"https://www.weibo.com/p/#{artist_long_id}"
end
def artist_commentary_desc
return if api_response.blank?
api_response["text"]
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
if element["href"].present?
href = Addressable::URI.heuristic_parse(element["href"])
href.site ||= "https://www.weibo.com"
href.scheme ||= "https"
element["href"] = href.to_s
end
if element["src"].present?
src = Addressable::URI.heuristic_parse(element["src"])
src.scheme ||= "https"
element["src"] = src.to_s
end
end
end
def normalized_for_artist_finder
url =~ %r{weibo\.com/(u|p)/\d+\z}i
end
def normalizable_for_artist_finder?
artist_short_id_from_url.present? || artist_long_id.present?
end
def normalize_for_artist_finder
profile_url = profile_short_url || profile_long_url
profile_url || url
end
def self.convert_image_to_large(url)
url.gsub(%r{.cn/\w+/(\w+)}, '.cn/large/\1')
end
def illust_long_id
[url, referer_url].compact.map { |x| x[PAGE_URL_2, :illust_long_id] || x[PAGE_URL_3, :illust_long_id] || x[PAGE_URL_4, :illust_long_id] }.compact.first
end
def illust_base62_id
[url, referer_url].compact.map { |x| x[PAGE_URL_1, :illust_base62_id] || x[PAGE_URL_3, :illust_base62_id] }.compact.first
end
def artist_short_id_from_url
[url, referer_url].compact.map { |x| x[PROFILE_URL_1, :artist_short_id] || x[PROFILE_URL_2, :artist_short_id] || x[PAGE_URL_4, :artist_short_id] }.compact.first
end
def artist_short_id
artist_short_id_from_url || api_response&.dig("user", "id")
end
def artist_long_id
[url, referer_url].compact.map { |x| x[PROFILE_URL_3, :artist_long_id] }.compact.first
end
def mobile_url
if illust_long_id.present?
"https://m.weibo.cn/detail/#{illust_long_id}"
elsif illust_base62_id.present?
"https://m.weibo.cn/status/#{illust_base62_id}"
end
end
def api_response
return nil if mobile_url.blank?
resp = Danbooru::Http.cache(1.minute).get(mobile_url)
json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1]
return nil if json_string.blank?
JSON.parse(json_string)["status"]
end
memoize :api_response
end
end
end

View File

@@ -0,0 +1,92 @@
require 'test_helper'
module Sources
class WeiboTest < ActiveSupport::TestCase
context "A post with multiple pictures" do
setup do
@site = Sources::Strategies.find("https://www.weibo.com/5501756072/J2UNKfbqV?type=comment#_rnd1590548401855")
end
should "extract all the image urls" do
urls = %w[
https://wx1.sinaimg.cn/large/0060kO5aly1gezsyt5xvhj30ok0sgtc9.jpg
https://wx3.sinaimg.cn/large/0060kO5aly1gezsyuaas1j30go0sgjtj.jpg
https://wx3.sinaimg.cn/large/0060kO5aly1gezsys1ai9j30gi0sg0v9.jpg
]
assert_equal(urls, @site.image_urls)
end
should "get the correct commentary" do
assert_not_nil(@site.artist_commentary_desc)
end
should "get the profile url" do
assert_equal("https://www.weibo.com/u/5501756072", @site.profile_url)
end
should "set the right source" do
assert_equal("https://www.weibo.com/5501756072/J2UNKfbqV", @site.canonical_url)
end
should "download an image" do
assert_downloaded(134_721, @site.image_url)
end
should "get the tags" do
tags = [
%w[fgo https://s.weibo.com/weibo/fgo],
%w[Alter组 https://s.weibo.com/weibo/Alter组]
]
assert_equal(tags, @site.tags)
end
should "find the correct artist" do
@artist = FactoryBot.create(:artist, name: "nipi27", url_string: "https://www.weibo.com/u/5501756072")
assert_equal([@artist], @site.artists)
end
end
context "A deleted or not existing picture" do
should "still find the artist name" do
site = Sources::Strategies.find("https://www.weibo.com/5501756072/AsdAsdAsd")
artist = FactoryBot.create(:artist, name: "nipi27", url_string: "https://www.weibo.com/u/5501756072")
assert_equal([artist], site.artists)
end
end
context "A post with video" do
should "get the correct video" do
site = Sources::Strategies.find("https://www.weibo.com/5501756072/IF9fugHzj")
assert_downloaded(7_676_656, site.image_url)
end
end
context "A direct image sample upload" do
should "get the largest version" do
sample = Sources::Strategies.find("https://wx3.sinaimg.cn/mw690/a00fa34cly1gf62g2n8z3j21yu2jo1ky.jpg")
assert_equal("https://wx3.sinaimg.cn/large/a00fa34cly1gf62g2n8z3j21yu2jo1ky.jpg", sample.image_url)
end
end
context "An album url for a post with multiple pictures" do
should "upload the right picture rather than just the first" do
site = Sources::Strategies.find("http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t")
assert_equal("https://wx4.sinaimg.cn/large/7eb64558gy1fnbryb5nzoj20dw10419t.jpg", site.image_url)
end
end
context "An upload from the batch bookmarklet" do
should "set the right source" do
url = "https://wx1.sinaimg.cn/large/7eb64558gy1fnbryriihwj20dw104wtu.jpg"
ref = "http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t"
site = Sources::Strategies.find(url, ref)
assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.canonical_url)
end
end
end
end