Fix placeholder names like weibo_1234, bilibili_1234, nijie1234, and nicoseiga1234 being suggested as Other Names when creating new artist entries. These are meant to be placeholders for the tag name, not used as other names.
168 lines
5.2 KiB
Ruby
168 lines
5.2 KiB
Ruby
# frozen_string_literal: true
|
|
|
|
# @see Source::URL::Bilibili
|
|
module Source
|
|
class Extractor
|
|
class Bilibili < Source::Extractor
|
|
def match?
|
|
Source::URL::Bilibili === parsed_url
|
|
end
|
|
|
|
def image_urls
|
|
if parsed_url&.full_image_url.present?
|
|
[parsed_url.full_image_url]
|
|
elsif data.present?
|
|
if t_work_id.present?
|
|
image_urls = data.dig("modules", "module_dynamic", "major", "draw", "items").to_a.pluck("src")
|
|
elsif h_work_id.present?
|
|
image_urls = data.dig("item", "pictures").to_a.pluck("img_src")
|
|
end
|
|
image_urls.to_a.compact.map { |u| Source::URL.parse(u).full_image_url || u }
|
|
elsif article_id.present?
|
|
page&.search("#article-content img").to_a.pluck("data-src").compact.map { |u| Source::URL.parse(URI.join("https://", u)).full_image_url || u }
|
|
else
|
|
[parsed_url.original_url]
|
|
end
|
|
end
|
|
|
|
def page_url
|
|
t_work_page || parsed_url.page_url || parsed_referer&.page_url
|
|
end
|
|
|
|
def t_work_page
|
|
return unless t_work_id.present?
|
|
"https://t.bilibili.com/#{data["id_str"]}"
|
|
end
|
|
|
|
def artist_commentary_title
|
|
if article_id.present?
|
|
page&.at(".article-container .title")&.text&.squish&.strip
|
|
end
|
|
end
|
|
|
|
def artist_commentary_desc
|
|
if t_work_id.present?
|
|
data.dig("modules", "module_dynamic", "desc", "rich_text_nodes").map do |text_node|
|
|
case text_node["type"]
|
|
when "RICH_TEXT_NODE_TYPE_BV"
|
|
"<a href='#{URI.join("https://", text_node["jump_url"])}'>#{text_node["text"]}</a>"
|
|
when "RICH_TEXT_NODE_TYPE_EMOJI"
|
|
" #{text_node.dig("emoji", "icon_url")} "
|
|
else # RICH_TEXT_NODE_TYPE_AT (mentions), RICH_TEXT_NODE_TYPE_TEXT (text), RICH_TEXT_NODE_TYPE_TOPIC (hashtags)
|
|
text_node["text"]
|
|
end
|
|
end.join
|
|
elsif h_work_id.present?
|
|
data.dig("item", "description")
|
|
elsif article_id.present?
|
|
page&.at("#article-content")&.to_html
|
|
end
|
|
end
|
|
|
|
def dtext_artist_commentary_desc
|
|
DText.from_html(artist_commentary_desc)
|
|
end
|
|
|
|
def tags
|
|
if t_work_id.present?
|
|
tag_list = data.dig("modules", "module_dynamic", "desc", "rich_text_nodes").to_a.select { |n| n["type"] == "RICH_TEXT_NODE_TYPE_TOPIC" }.map { |tag| tag["text"].gsub(/(^#|#$)/, "") }
|
|
elsif h_work_id.present?
|
|
tag_list = data.dig("item", "tags").to_a.pluck(:tag)
|
|
else # bilibili.com/read/:id posts have no tags that I could find
|
|
return []
|
|
end
|
|
|
|
tag_list.map { |tag| [tag, "https://t.bilibili.com/topic/name/#{tag}"] }
|
|
end
|
|
|
|
def artist_name
|
|
if t_work_id.present?
|
|
data.dig("modules", "module_author", "name")
|
|
elsif h_work_id.present?
|
|
data.dig("user", "name")
|
|
elsif article_id.present?
|
|
page&.at(".article-container .up-name")&.text&.squish&.strip
|
|
end
|
|
end
|
|
|
|
def tag_name
|
|
return unless artist_id.present?
|
|
"bilibili_#{artist_id}"
|
|
end
|
|
|
|
def other_names
|
|
[artist_name].compact
|
|
end
|
|
|
|
def artist_id
|
|
artist_id_from_data || parsed_url.artist_id || parsed_referer&.artist_id
|
|
end
|
|
|
|
def artist_id_from_data
|
|
if t_work_id.present?
|
|
data.dig("modules", "module_author", "mid")
|
|
elsif h_work_id.present?
|
|
data.dig("user", "uid")
|
|
elsif article_id.present?
|
|
artist_url = page&.at(".article-container .up-name")&.[]("href")
|
|
Source::URL.parse(URI.join("https://", artist_url))&.artist_id
|
|
end
|
|
end
|
|
|
|
def profile_url
|
|
return nil if artist_id.blank?
|
|
"https://space.bilibili.com/#{artist_id}"
|
|
end
|
|
|
|
def t_work_id
|
|
# for a repost this will be the ID of the repost, not the original one
|
|
parsed_url.t_work_id || parsed_referer&.t_work_id
|
|
end
|
|
|
|
def h_work_id
|
|
parsed_url.h_work_id || parsed_referer&.h_work_id
|
|
end
|
|
|
|
def article_id
|
|
parsed_url.article_id || parsed_referer&.article_id
|
|
end
|
|
|
|
def http
|
|
super.use(:spoof_referrer)
|
|
end
|
|
|
|
def page
|
|
return unless page_url.present?
|
|
response = http.cache(1.minute).get(page_url)
|
|
return response.parse unless response.status != 200
|
|
end
|
|
|
|
def get_json(url)
|
|
response = http.cache(1.minute).get(url)
|
|
return {} unless response.status == 200
|
|
JSON.parse(response).with_indifferent_access
|
|
rescue JSON::ParserError
|
|
{}
|
|
end
|
|
|
|
def data
|
|
if t_work_id.present?
|
|
data = get_json("https://api.bilibili.com/x/polymer/web-dynamic/v1/detail?timezone_offset=-60&id=#{t_work_id}")
|
|
if data.dig("data", "item", "orig", "id_str").present? # it means it's a repost
|
|
data.dig("data", "item", "orig")
|
|
else
|
|
data.dig("data", "item").to_h
|
|
end
|
|
elsif h_work_id.present?
|
|
data = get_json("https://api.vc.bilibili.com/link_draw/v1/doc/detail?doc_id=#{h_work_id}")
|
|
data["data"].to_h
|
|
else
|
|
{}
|
|
end
|
|
end
|
|
|
|
memoize :data, :page
|
|
end
|
|
end
|
|
end
|