Merge pull request #5032 from nonamethanks/factor-out-weibo
sources: factor out Source::URL::Weibo
This commit is contained in:
@@ -31,6 +31,7 @@ module Source
|
||||
Source::URL::Plurk,
|
||||
Source::URL::Skeb,
|
||||
Source::URL::TwitPic,
|
||||
Source::URL::Weibo,
|
||||
]
|
||||
|
||||
# Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL.
|
||||
|
||||
95
app/logical/source/url/weibo.rb
Normal file
95
app/logical/source/url/weibo.rb
Normal file
@@ -0,0 +1,95 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
class Source::URL::Weibo < Source::URL
|
||||
attr_reader :full_image_url
|
||||
|
||||
def self.match?(url)
|
||||
url.domain.in?(["weibo.com", "weibo.cn", "sinaimg.cn"])
|
||||
end
|
||||
|
||||
def parse
|
||||
case [host, *path_segments]
|
||||
|
||||
# http://ww1.sinaimg.cn/large/69917555gw1f6ggdghk28j20c87lbhdt.jpg
|
||||
# https://wx1.sinaimg.cn/large/002NQ2vhly1gqzqfk1agfj62981aw4qr02.jpg
|
||||
# http://ww4.sinaimg.cn/mw690/77a2d531gw1f4u411ws3aj20m816fagg.jpg (sample)
|
||||
# https://wx4.sinaimg.cn/orj360/e3930166gy1g546bz86cij20u00u040y.jpg (sample)
|
||||
# http://ww3.sinaimg.cn/mw1024/0065kjmOgw1fabcanrzx6j30f00lcjwv.jpg (sample)
|
||||
in /\w+\.sinaimg\.cn/ => host, size, file
|
||||
@full_image_url = "https://#{host}/large/#{file}"
|
||||
|
||||
# http://tw.weibo.com/1300957955/3786333853668537
|
||||
in "tw.weibo.com", /\w+/, /\d+/ => illust_long_id
|
||||
@illust_long_id = illust_long_id
|
||||
|
||||
# http://weibo.com/3357910224/EEHA1AyJP
|
||||
# https://www.weibo.com/5501756072/IF9fugHzj?from=page_1005055501756072_profile&wvr=6&mod=weibotime
|
||||
in /(\w+\.)?weibo\.(com|cn)/, /\d+/ => artist_short_id, /\w+/ => illust_base62_id
|
||||
@artist_short_id = artist_short_id
|
||||
@illust_base62_id = illust_base62_id
|
||||
|
||||
# http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t
|
||||
# http://photo.weibo.com/5732523783/talbum/detail/photo_id/4029784374069389?prel=p6_3
|
||||
in "photo.weibo.com", /\d+/ => artist_short_id, _, _, _, /\d+/ => illust_long_id, *rest
|
||||
@artist_short_id = artist_short_id
|
||||
@illust_long_id = illust_long_id
|
||||
|
||||
# https://m.weibo.cn/detail/4506950043618873
|
||||
in "m.weibo.cn", "detail", /\d+/ => illust_long_id
|
||||
@illust_base62_id = illust_base62_id
|
||||
|
||||
# https://m.weibo.cn/status/J33G4tH1B
|
||||
in "m.weibo.cn", "status", /\w+/ => illust_base62_id
|
||||
@illust_base62_id = illust_base62_id
|
||||
|
||||
# https://www.weibo.com/u/5501756072
|
||||
# https://m.weibo.cn/profile/5501756072
|
||||
# https://m.weibo.cn/u/5501756072
|
||||
in _, ("u" | "profile"), /\d+/ => artist_short_id
|
||||
@artist_short_id = artist_short_id
|
||||
|
||||
# https://www.weibo.com/5501756072
|
||||
in _, /\d+/ => artist_short_id
|
||||
@artist_short_id = artist_short_id
|
||||
|
||||
in _, "p", /\d+/ => artist_long_id
|
||||
@artist_long_id = artist_long_id
|
||||
|
||||
else
|
||||
end
|
||||
end
|
||||
|
||||
def image_url?
|
||||
full_image_url.present?
|
||||
end
|
||||
|
||||
def profile_urls
|
||||
[profile_short_url, profile_long_url].compact
|
||||
end
|
||||
|
||||
def profile_short_url
|
||||
return if @artist_short_id.blank?
|
||||
"https://www.weibo.com/u/#{@artist_short_id}"
|
||||
end
|
||||
|
||||
def profile_long_url
|
||||
return if @artist_long_id.blank?
|
||||
"https://www.weibo.com/p/#{@artist_long_id}"
|
||||
end
|
||||
|
||||
def mobile_url
|
||||
if @illust_long_id.present?
|
||||
"https://m.weibo.cn/detail/#{@illust_long_id}"
|
||||
elsif @illust_base62_id.present?
|
||||
"https://m.weibo.cn/status/#{@illust_base62_id}"
|
||||
end
|
||||
end
|
||||
|
||||
def normalized_url
|
||||
if @artist_short_id.present? && @illust_base62_id.present?
|
||||
"https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}"
|
||||
elsif mobile_url.present?
|
||||
mobile_url
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -1,91 +1,35 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Image URLS
|
||||
# * http://ww1.sinaimg.cn/large/69917555gw1f6ggdghk28j20c87lbhdt.jpg
|
||||
# * https://wx1.sinaimg.cn/large/002NQ2vhly1gqzqfk1agfj62981aw4qr02.jpg (more than 32 characters in hash)
|
||||
#
|
||||
# Image Samples
|
||||
# * http://ww4.sinaimg.cn/mw690/77a2d531gw1f4u411ws3aj20m816fagg.jpg
|
||||
# * https://wx4.sinaimg.cn/orj360/e3930166gy1g546bz86cij20u00u040y.jpg
|
||||
# * http://ww3.sinaimg.cn/mw1024/0065kjmOgw1fabcanrzx6j30f00lcjwv.jpg
|
||||
#
|
||||
# Page URLS
|
||||
# * http://weibo.com/3357910224/EEHA1AyJP
|
||||
# * https://www.weibo.com/5501756072/IF9fugHzj?from=page_1005055501756072_profile&wvr=6&mod=weibotime
|
||||
#
|
||||
# * http://photo.weibo.com/5732523783/talbum/detail/photo_id/4029784374069389?prel=p6_3
|
||||
# * http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t
|
||||
# * http://tw.weibo.com/1300957955/3786333853668537
|
||||
#
|
||||
# * https://m.weibo.cn/detail/4506950043618873
|
||||
# * https://m.weibo.cn/status/J33G4tH1B
|
||||
#
|
||||
# Video
|
||||
# * https://www.weibo.com/5501756072/IF9fugHzj
|
||||
#
|
||||
# Profile URLS
|
||||
# ### Short ID
|
||||
# * https://www.weibo.com/5501756072
|
||||
# * https://www.weibo.com/u/5501756072
|
||||
# * https://m.weibo.cn/profile/5501756072
|
||||
# * https://m.weibo.cn/u/5501756072
|
||||
# ### Long ID
|
||||
# * https://www.weibo.com/p/1005055501756072
|
||||
|
||||
# @see Source::URL::Weibo
|
||||
module Sources
|
||||
module Strategies
|
||||
class Weibo < Base
|
||||
PROFILE_URL_1 = %r{\Ahttps?://(?:(?:www|m)\.)?weibo\.c(?:om|n)/(?:(?:u|profile)/)?(?<artist_short_id>\d+)\z}i
|
||||
PROFILE_URL_2 = %r{\Ahttps?://photo\.weibo\.com/(?<artist_short_id>\d+)}i
|
||||
PROFILE_URL_3 = %r{\Ahttps?://(?:www\.)?weibo\.com/p/(?<artist_long_id>\d+)}i
|
||||
|
||||
PAGE_URL_1 = %r{\Ahttps?://(?:www\.)?weibo\.com/(?<artist_short_id>\d+)/(?<illust_base62_id>\w+)(?:\?.*)?\z}i
|
||||
PAGE_URL_2 = %r{#{PROFILE_URL_2}/(?:wbphotos/large/mid|talbum/detail/photo_id)/(?<illust_long_id>\d+)(?:/pid/(?<image_id>\w{32}))?}i
|
||||
PAGE_URL_3 = %r{\Ahttps?://m\.weibo\.cn/(?:detail/(?<illust_long_id>\d+)|status/(?<illust_base62_id>\w+))}i
|
||||
PAGE_URL_4 = %r{\Ahttps?://tw\.weibo\.com/(?:(?<artist_short_id>\d+)|\w+)/(?<illust_long_id>\d+)}i
|
||||
|
||||
IMAGE_URL = %r{\Ahttps?://\w+\.sinaimg\.cn/\w+/(?<image_id>\w+)\.}i
|
||||
|
||||
def domains
|
||||
["weibo.com", "weibo.cn", "weibocdn.com", "sinaimg.cn"]
|
||||
def match?
|
||||
Source::URL::Weibo === parsed_url
|
||||
end
|
||||
|
||||
def site_name
|
||||
"Weibo"
|
||||
parsed_url.site_name
|
||||
end
|
||||
|
||||
def image_urls
|
||||
urls = []
|
||||
|
||||
if url =~ IMAGE_URL
|
||||
urls << self.class.convert_image_to_large(url)
|
||||
if parsed_url.image_url?
|
||||
[parsed_url.full_image_url]
|
||||
elsif api_response.present?
|
||||
if api_response["pics"].present?
|
||||
urls += api_response["pics"].to_a.map { |pic| self.class.convert_image_to_large(pic["url"]) }
|
||||
api_response["pics"].pluck("url").map { |url| Source::URL.parse(url).full_image_url }
|
||||
elsif api_response.dig("page_info", "type") == "video"
|
||||
variants = api_response["page_info"]["media_info"].to_h.values + api_response["page_info"]["urls"].to_h.values
|
||||
urls << variants.max_by do |variant|
|
||||
largest_video = variants.max_by do |variant|
|
||||
if /template=(?<width>\d+)x(?<height>\d+)/ =~ variant.to_s
|
||||
width.to_i * height.to_i
|
||||
else
|
||||
0
|
||||
end
|
||||
end
|
||||
[largest_video]
|
||||
end
|
||||
else
|
||||
urls << url
|
||||
end
|
||||
|
||||
urls
|
||||
end
|
||||
|
||||
def image_url
|
||||
image_id = url[PAGE_URL_2, :image_id] if url =~ PAGE_URL_2
|
||||
|
||||
if image_id.present?
|
||||
image_urls.select { |i| i[IMAGE_URL, :image_id] == image_id }.compact.first
|
||||
else
|
||||
image_urls.first
|
||||
end
|
||||
end
|
||||
|
||||
@@ -94,47 +38,30 @@ module Sources
|
||||
end
|
||||
|
||||
def page_url
|
||||
if api_response.present?
|
||||
artist_id = api_response["user"]["id"]
|
||||
illust_id = api_response["bid"]
|
||||
"https://www.weibo.com/#{artist_id}/#{illust_id}"
|
||||
elsif url =~ IMAGE_URL
|
||||
self.class.convert_image_to_large(url)
|
||||
else
|
||||
url
|
||||
end
|
||||
return nil unless api_response.present?
|
||||
|
||||
artist_id = api_response["user"]["id"]
|
||||
illust_base62_id = api_response["bid"]
|
||||
"https://www.weibo.com/#{artist_id}/#{illust_base62_id}"
|
||||
end
|
||||
|
||||
def tags
|
||||
return [] if api_response.blank?
|
||||
|
||||
matches = api_response["text"]&.scan(/surl-text">#(.*?)#</).to_a.map { |m| m[0] }
|
||||
|
||||
matches.map do |match|
|
||||
[match, "https://s.weibo.com/weibo/#{match}"]
|
||||
end
|
||||
end
|
||||
|
||||
def profile_urls
|
||||
[profile_short_url, profile_long_url].compact
|
||||
(parsed_url.profile_urls + parsed_referer&.profile_urls.to_a).uniq
|
||||
end
|
||||
|
||||
def profile_url
|
||||
profile_urls.first
|
||||
end
|
||||
|
||||
def profile_short_url
|
||||
return if artist_short_id.blank?
|
||||
|
||||
"https://www.weibo.com/u/#{artist_short_id}"
|
||||
end
|
||||
|
||||
def profile_long_url
|
||||
return if artist_long_id.blank?
|
||||
|
||||
"https://www.weibo.com/p/#{artist_long_id}"
|
||||
end
|
||||
|
||||
def artist_name
|
||||
api_response&.dig("user", "screen_name")
|
||||
end
|
||||
@@ -163,54 +90,11 @@ module Sources
|
||||
end
|
||||
|
||||
def normalize_for_source
|
||||
return url if url =~ PAGE_URL_2
|
||||
artist_id = artist_short_id_from_url
|
||||
|
||||
if artist_id.present?
|
||||
if illust_base62_id.present?
|
||||
"https://www.weibo.com/#{artist_id}/#{illust_base62_id}"
|
||||
elsif illust_long_id.present?
|
||||
"https://photo.weibo.com/#{artist_id}/talbum/detail/photo_id/#{illust_long_id}"
|
||||
end
|
||||
elsif mobile_url.present?
|
||||
mobile_url
|
||||
end
|
||||
end
|
||||
|
||||
def self.convert_image_to_large(url)
|
||||
url.gsub(%r{.cn/\w+/(\w+)}, '.cn/large/\1')
|
||||
end
|
||||
|
||||
def illust_long_id
|
||||
urls.map { |x| x[PAGE_URL_2, :illust_long_id] || x[PAGE_URL_3, :illust_long_id] || x[PAGE_URL_4, :illust_long_id] }.compact.first
|
||||
end
|
||||
|
||||
def illust_base62_id
|
||||
urls.map { |x| x[PAGE_URL_1, :illust_base62_id] || x[PAGE_URL_3, :illust_base62_id] }.compact.first
|
||||
end
|
||||
|
||||
def artist_short_id_from_url
|
||||
urls.map { |x| x[PROFILE_URL_1, :artist_short_id] || x[PROFILE_URL_2, :artist_short_id] || x[PAGE_URL_1, :artist_short_id] || x[PAGE_URL_4, :artist_short_id] }.compact.first
|
||||
end
|
||||
|
||||
def artist_short_id
|
||||
artist_short_id_from_url || api_response&.dig("user", "id")
|
||||
end
|
||||
|
||||
def artist_long_id
|
||||
urls.map { |x| x[PROFILE_URL_3, :artist_long_id] }.compact.first
|
||||
end
|
||||
|
||||
def mobile_url
|
||||
if illust_long_id.present?
|
||||
"https://m.weibo.cn/detail/#{illust_long_id}"
|
||||
elsif illust_base62_id.present?
|
||||
"https://m.weibo.cn/status/#{illust_base62_id}"
|
||||
end
|
||||
parsed_url.normalized_url
|
||||
end
|
||||
|
||||
def api_response
|
||||
return {} if mobile_url.blank?
|
||||
return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?
|
||||
|
||||
resp = http.cache(1.minute).get(mobile_url)
|
||||
json_string = resp.to_s[/var \$render_data = \[(.*)\]\[0\]/m, 1]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
require 'test_helper'
|
||||
require "test_helper"
|
||||
|
||||
module Sources
|
||||
class WeiboTest < ActiveSupport::TestCase
|
||||
@@ -40,7 +40,7 @@ module Sources
|
||||
should "get the tags" do
|
||||
tags = [
|
||||
%w[fgo https://s.weibo.com/weibo/fgo],
|
||||
%w[Alter组 https://s.weibo.com/weibo/Alter组]
|
||||
%w[Alter组 https://s.weibo.com/weibo/Alter组],
|
||||
]
|
||||
assert_equal(tags, @site.tags)
|
||||
end
|
||||
@@ -76,18 +76,10 @@ module Sources
|
||||
end
|
||||
end
|
||||
|
||||
context "An album url for a post with multiple pictures" do
|
||||
should "upload the right picture rather than just the first" do
|
||||
site = Sources::Strategies.find("http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t")
|
||||
|
||||
assert_equal("https://wx4.sinaimg.cn/large/7eb64558gy1fnbryb5nzoj20dw10419t.jpg", site.image_url)
|
||||
end
|
||||
end
|
||||
|
||||
context "An upload from the batch bookmarklet" do
|
||||
context "A multi-page upload" do
|
||||
should "set the right source" do
|
||||
url = "https://wx1.sinaimg.cn/large/7eb64558gy1fnbryriihwj20dw104wtu.jpg"
|
||||
ref = "http://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t"
|
||||
ref = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4194742441135220/pid/7eb64558gy1fnbryb5nzoj20dw10419t"
|
||||
site = Sources::Strategies.find(url, ref)
|
||||
|
||||
assert_equal("https://www.weibo.com/2125874520/FDKGo4Lk0", site.canonical_url)
|
||||
@@ -97,12 +89,12 @@ module Sources
|
||||
context "normalizing for source" do
|
||||
should "normalize correctly" do
|
||||
source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime"
|
||||
source2 = "http://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81"
|
||||
source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81"
|
||||
source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635"
|
||||
source4 = "https://tw.weibo.com/SEINEN/4098035921690224"
|
||||
|
||||
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Sources::Strategies.normalize_source(source1))
|
||||
assert_equal(source2, Sources::Strategies.normalize_source(source2))
|
||||
assert_equal("https://m.weibo.cn/detail/4242129997905387", Sources::Strategies.normalize_source(source2))
|
||||
assert_equal("https://m.weibo.cn/status/4173757483008088", Sources::Strategies.normalize_source(source3))
|
||||
assert_equal("https://m.weibo.cn/detail/4098035921690224", Sources::Strategies.normalize_source(source4))
|
||||
end
|
||||
|
||||
Reference in New Issue
Block a user