sources: add 4chan.org upload support.
Add upload support for 4chan.org. You can upload either individual posts or entire threads at a time.
This commit is contained in:
@@ -34,7 +34,7 @@ module Danbooru
|
||||
# @return [Addressable:URI] The parsed and normalized URL.
|
||||
attr_reader :url
|
||||
|
||||
delegate :domain, :host, :port, :site, :path, :query, :password, to: :url
|
||||
delegate :domain, :host, :port, :site, :path, :query, :fragment, :password, to: :url
|
||||
|
||||
# Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL.
|
||||
#
|
||||
|
||||
@@ -58,6 +58,7 @@ module Source
|
||||
Source::Extractor::Reddit,
|
||||
Source::Extractor::Bilibili,
|
||||
Source::Extractor::Rule34DotUs,
|
||||
Source::Extractor::FourChan,
|
||||
]
|
||||
|
||||
# Should return true if the extractor is configured correctly. Return false
|
||||
@@ -299,7 +300,7 @@ module Source
|
||||
:dtext_title => dtext_artist_commentary_title,
|
||||
:dtext_description => dtext_artist_commentary_desc
|
||||
},
|
||||
:api_response => api_response.to_h
|
||||
:api_response => api_response
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
133
app/logical/source/extractor/four_chan.rb
Normal file
133
app/logical/source/extractor/four_chan.rb
Normal file
@@ -0,0 +1,133 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Source extractor for 4chan.org.
|
||||
#
|
||||
# TODO:
|
||||
#
|
||||
# * If given only an image URL, scrape the board catalog to find which thread it belongs to.
|
||||
# * If the thread is expired, scrape data from archive sites.
|
||||
# * If the image or video is a soundpost, remux the file to include the sound (https://github.com/rcc11/4chan-sounds-player#creating-sound-images)
|
||||
#
|
||||
# @see https://github.com/4chan/4chan-API
|
||||
# @see https://github.com/4chan/4chan-API/blob/master/pages/Threads.md
|
||||
module Source
|
||||
class Extractor
|
||||
class FourChan < Source::Extractor
|
||||
def match?
|
||||
Source::URL::FourChan === parsed_url
|
||||
end
|
||||
|
||||
def image_urls
|
||||
if parsed_url.full_image_url.present?
|
||||
[parsed_url.full_image_url]
|
||||
# If this is a post URL, or an image URL for which we can find the post
|
||||
elsif post.present? && post["tim"].present? && post["ext"].present?
|
||||
["https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}"]
|
||||
# If this is a thread URL
|
||||
elsif thread_id.present? && post_id_from_url.nil?
|
||||
api_response.map do |post|
|
||||
"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}" if post["tim"].present? && post["ext"].present?
|
||||
end.compact
|
||||
# If this is a thumbnail image URL and we can't get the full image URL from the API
|
||||
elsif parsed_url.image_url?
|
||||
[url.to_s]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def page_url
|
||||
if board.present? && thread_id.present? && post_id.present?
|
||||
"https://#{domain}/#{board}/thread/#{thread_id}#p#{post_id}"
|
||||
elsif board.present? && thread_id.present?
|
||||
"https://#{domain}/#{board}/thread/#{thread_id}"
|
||||
end
|
||||
end
|
||||
|
||||
def artist_commentary_title
|
||||
if post.present?
|
||||
"#{post["name"]}#{post["trip"]} #{post["now"]} No.#{post["no"]}"
|
||||
end
|
||||
end
|
||||
|
||||
def artist_commentary_desc
|
||||
commentary = String.new
|
||||
|
||||
if post["filename"].present?
|
||||
commentary << "<a href=\"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}\">#{post["filename"]}#{post["ext"]}</a> (#{post["fsize"].to_fs(:human_size)}, #{post["w"]}x#{post["h"]})\n"
|
||||
end
|
||||
|
||||
if post["com"].present?
|
||||
commentary << post["com"]
|
||||
end
|
||||
|
||||
commentary.presence
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc) do |element|
|
||||
if element.name == "a" && element["class"] == "quotelink"
|
||||
# `<a href="#p1234" class="quotelink">>>1234</a>`
|
||||
if element["href"].starts_with?("#")
|
||||
element["href"] = "https://#{domain}/#{board}/thread/#{thread_id}#{element["href"]}"
|
||||
# `<a href="/vt/thread/1234#p5678" class="quotelink">>>5678</a>`
|
||||
elsif element["href"].starts_with?("/")
|
||||
element["href"] = "https://#{domain}#{element["href"]}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def domain
|
||||
if parsed_url.domain in "4chan.org" | "4channel.org"
|
||||
"boards.#{parsed_url.domain}"
|
||||
elsif parsed_referer&.domain in "4chan.org" | "4channel.org"
|
||||
"boards.#{parsed_referer.domain}"
|
||||
end
|
||||
end
|
||||
|
||||
def board
|
||||
parsed_url.board || parsed_referer&.board
|
||||
end
|
||||
|
||||
def thread_id
|
||||
parsed_url.thread_id || parsed_referer&.thread_id
|
||||
end
|
||||
|
||||
def image_id
|
||||
parsed_url.image_id || parsed_referer&.image_id
|
||||
end
|
||||
|
||||
def post_id
|
||||
post_id_from_url || post_id_from_api
|
||||
end
|
||||
|
||||
def post_id_from_url
|
||||
parsed_url.post_id || parsed_referer&.post_id
|
||||
end
|
||||
|
||||
def post_id_from_api
|
||||
post["no"]
|
||||
end
|
||||
|
||||
memoize def post
|
||||
api_response.find do |post|
|
||||
(image_id.present? && post["tim"] == image_id) || post["no"] == post_id_from_url
|
||||
end.to_h
|
||||
end
|
||||
|
||||
memoize def api_response
|
||||
return [] unless api_url.present?
|
||||
|
||||
response = http.cache(1.minute).get(api_url)
|
||||
return [] unless response.status == 200
|
||||
|
||||
response.parse["posts"]
|
||||
end
|
||||
|
||||
def api_url
|
||||
"https://a.4cdn.org/#{board}/thread/#{thread_id}.json" if board.present? && thread_id.present?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
@@ -61,6 +61,7 @@ module Source
|
||||
Source::URL::Furaffinity,
|
||||
Source::URL::Bilibili,
|
||||
Source::URL::Rule34DotUs,
|
||||
Source::URL::FourChan,
|
||||
]
|
||||
|
||||
# Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL.
|
||||
|
||||
51
app/logical/source/url/four_chan.rb
Normal file
51
app/logical/source/url/four_chan.rb
Normal file
@@ -0,0 +1,51 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# @see https://github.com/4chan/4chan-API
|
||||
# @see https://github.com/4chan/4chan-API/blob/master/pages/User_images_and_static_content.md
|
||||
class Source::URL::FourChan < Source::URL
|
||||
attr_reader :board, :thread_id, :post_id, :image_type, :image_id, :full_image_url
|
||||
|
||||
def self.match?(url)
|
||||
url.domain.in?(%w[4cdn.org 4chan.org 4channel.org])
|
||||
end
|
||||
|
||||
def site_name
|
||||
"4chan"
|
||||
end
|
||||
|
||||
def parse
|
||||
case [subdomain, domain, *path_segments]
|
||||
|
||||
# https://boards.4channel.org/vt/thread/37293562#p37294005
|
||||
in _, ("4channel.org" | "4chan.org"), board, "thread", /\A[0-9]+\z/ => thread_id
|
||||
@board = board
|
||||
@thread_id = thread_id.to_i
|
||||
@post_id = fragment.to_s[/^p([0-9]+)$/, 1]&.to_i
|
||||
|
||||
# https://i.4cdn.org/vt/1668729957824814.webm
|
||||
# https://i.4cdn.org/vt/1668729957824814s.jpg
|
||||
in "i", "4cdn.org", board, /\A([0-9]+)(s?)\./
|
||||
@board = board
|
||||
@image_id = $1.to_i
|
||||
@image_type = $2 == "s" ? :preview : :original
|
||||
@full_image_url = url.to_s if @image_type == :original
|
||||
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def image_url?
|
||||
host == "i.4cdn.org"
|
||||
end
|
||||
|
||||
def page_url
|
||||
if thread_id.present?
|
||||
url.to_s
|
||||
end
|
||||
end
|
||||
|
||||
def api_url
|
||||
"https://a.4cdn.org/#{board}/thread/#{thread_id}.json" if board.present? && thread_id.present?
|
||||
end
|
||||
end
|
||||
@@ -9,8 +9,6 @@ class Source::URL::Null < Source::URL
|
||||
|
||||
def site_name
|
||||
case [subdomain, domain]
|
||||
in _, "4cdn.org"
|
||||
"4chan"
|
||||
in _, "myportfolio.com"
|
||||
"Adobe Portfolio"
|
||||
in _, "adobe.com" if host == "portfolio.adobe.com"
|
||||
|
||||
@@ -518,6 +518,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
|
||||
should_upload_successfully("https://rule34.xxx/index.php?page=post&s=view&id=6961597")
|
||||
should_upload_successfully("https://rule34.us/index.php?r=posts/view&id=6204967")
|
||||
|
||||
should_upload_successfully("https://boards.4channel.org/vt/thread/1#p1")
|
||||
|
||||
should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
|
||||
should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663")
|
||||
should_upload_successfully("https://seiga.nicovideo.jp/image/source/9146749")
|
||||
|
||||
73
test/unit/sources/four_chan_test.rb
Normal file
73
test/unit/sources/four_chan_test.rb
Normal file
@@ -0,0 +1,73 @@
|
||||
require "test_helper"
|
||||
|
||||
module Sources
|
||||
class FourChanTest < ActiveSupport::TestCase
|
||||
context "A 4chan source extractor" do
|
||||
context "A 4chan direct image url without a referer" do
|
||||
strategy_should_work(
|
||||
"https://i.4cdn.org/vt/1611919211191.jpg",
|
||||
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
|
||||
page_url: nil,
|
||||
artist_commentary_title: nil,
|
||||
artist_commentary_desc: nil,
|
||||
dtext_artist_commentary_desc: "",
|
||||
download_size: 145_602,
|
||||
)
|
||||
end
|
||||
|
||||
context "A 4chan direct image url with a referer" do
|
||||
strategy_should_work(
|
||||
"https://i.4cdn.org/vt/1611919211191.jpg",
|
||||
referer: "https://boards.4channel.org/vt/thread/1",
|
||||
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
|
||||
page_url: "https://boards.4channel.org/vt/thread/1#p1",
|
||||
artist_commentary_title: "Anonymous 01/29/21(Fri)06:20:11 No.1",
|
||||
dtext_artist_commentary_desc: <<~EOS.chomp,
|
||||
"vt.jpg":[https://i.4cdn.org/vt/1611919211191.jpg] (142 KB, 767x677)
|
||||
This board is for the discussion of Virtual YouTubers ("VTubers"), including those streaming in Japanese, English, and other languages. VTubers don't necessarily need to be on Youtube of course, they can be on Twitch, Niconico, Bilibili, or any other platform.
|
||||
|
||||
Please note that discussion should pertain to a VTuber's streams and content, and should not pertain to their real lives, relationships, or appearances ("IRL").
|
||||
EOS
|
||||
download_size: 145_602,
|
||||
)
|
||||
end
|
||||
|
||||
context "A 4chan thumbnail image url without a referer" do
|
||||
strategy_should_work(
|
||||
"https://i.4cdn.org/vt/1611919211191s.jpg",
|
||||
image_urls: ["https://i.4cdn.org/vt/1611919211191s.jpg"],
|
||||
page_url: nil,
|
||||
artist_commentary_title: nil,
|
||||
artist_commentary_desc: nil,
|
||||
dtext_artist_commentary_desc: "",
|
||||
download_size: 7430,
|
||||
)
|
||||
end
|
||||
|
||||
context "A 4chan post url" do
|
||||
strategy_should_work(
|
||||
"https://boards.4channel.org/vt/thread/1#p1",
|
||||
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
|
||||
page_url: "https://boards.4channel.org/vt/thread/1#p1",
|
||||
artist_commentary_title: "Anonymous 01/29/21(Fri)06:20:11 No.1",
|
||||
dtext_artist_commentary_desc: <<~EOS.chomp,
|
||||
"vt.jpg":[https://i.4cdn.org/vt/1611919211191.jpg] (142 KB, 767x677)
|
||||
This board is for the discussion of Virtual YouTubers ("VTubers"), including those streaming in Japanese, English, and other languages. VTubers don't necessarily need to be on Youtube of course, they can be on Twitch, Niconico, Bilibili, or any other platform.
|
||||
|
||||
Please note that discussion should pertain to a VTuber's streams and content, and should not pertain to their real lives, relationships, or appearances ("IRL").
|
||||
EOS
|
||||
download_size: 145_602,
|
||||
)
|
||||
end
|
||||
|
||||
context "A 4chan thread url" do
|
||||
strategy_should_work(
|
||||
"https://boards.4channel.org/vt/thread/1",
|
||||
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
|
||||
page_url: "https://boards.4channel.org/vt/thread/1",
|
||||
download_size: 145_602,
|
||||
)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user