sources: add 4chan.org upload support.

Add upload support for 4chan.org. You can upload either individual posts or entire threads at a time.
This commit is contained in:
evazion
2022-11-17 03:32:08 -06:00
parent 94824fb171
commit 902cd0bbdf
8 changed files with 263 additions and 4 deletions

View File

@@ -34,7 +34,7 @@ module Danbooru
# @return [Addressable:URI] The parsed and normalized URL.
attr_reader :url
delegate :domain, :host, :port, :site, :path, :query, :password, to: :url
delegate :domain, :host, :port, :site, :path, :query, :fragment, :password, to: :url
# Parse a string into a URL, or raise an exception if the string is not a valid HTTP or HTTPS URL.
#

View File

@@ -58,6 +58,7 @@ module Source
Source::Extractor::Reddit,
Source::Extractor::Bilibili,
Source::Extractor::Rule34DotUs,
Source::Extractor::FourChan,
]
# Should return true if the extractor is configured correctly. Return false
@@ -299,7 +300,7 @@ module Source
:dtext_title => dtext_artist_commentary_title,
:dtext_description => dtext_artist_commentary_desc
},
:api_response => api_response.to_h
:api_response => api_response
}
end

View File

@@ -0,0 +1,133 @@
# frozen_string_literal: true
# Source extractor for 4chan.org.
#
# TODO:
#
# * If given only an image URL, scrape the board catalog to find which thread it belongs to.
# * If the thread is expired, scrape data from archive sites.
# * If the image or video is a soundpost, remux the file to include the sound (https://github.com/rcc11/4chan-sounds-player#creating-sound-images)
#
# @see https://github.com/4chan/4chan-API
# @see https://github.com/4chan/4chan-API/blob/master/pages/Threads.md
module Source
class Extractor
class FourChan < Source::Extractor
def match?
Source::URL::FourChan === parsed_url
end
def image_urls
if parsed_url.full_image_url.present?
[parsed_url.full_image_url]
# If this is a post URL, or an image URL for which we can find the post
elsif post.present? && post["tim"].present? && post["ext"].present?
["https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}"]
# If this is a thread URL
elsif thread_id.present? && post_id_from_url.nil?
api_response.map do |post|
"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}" if post["tim"].present? && post["ext"].present?
end.compact
# If this is a thumbnail image URL and we can't get the full image URL from the API
elsif parsed_url.image_url?
[url.to_s]
else
[]
end
end
def page_url
if board.present? && thread_id.present? && post_id.present?
"https://#{domain}/#{board}/thread/#{thread_id}#p#{post_id}"
elsif board.present? && thread_id.present?
"https://#{domain}/#{board}/thread/#{thread_id}"
end
end
def artist_commentary_title
if post.present?
"#{post["name"]}#{post["trip"]} #{post["now"]} No.#{post["no"]}"
end
end
def artist_commentary_desc
commentary = String.new
if post["filename"].present?
commentary << "<a href=\"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}\">#{post["filename"]}#{post["ext"]}</a> (#{post["fsize"].to_fs(:human_size)}, #{post["w"]}x#{post["h"]})\n"
end
if post["com"].present?
commentary << post["com"]
end
commentary.presence
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
if element.name == "a" && element["class"] == "quotelink"
# `<a href="#p1234" class="quotelink">&gt;&gt;1234</a>`
if element["href"].starts_with?("#")
element["href"] = "https://#{domain}/#{board}/thread/#{thread_id}#{element["href"]}"
# `<a href="/vt/thread/1234#p5678" class="quotelink">&gt;&gt;5678</a>`
elsif element["href"].starts_with?("/")
element["href"] = "https://#{domain}#{element["href"]}"
end
end
end
end
def domain
if parsed_url.domain in "4chan.org" | "4channel.org"
"boards.#{parsed_url.domain}"
elsif parsed_referer&.domain in "4chan.org" | "4channel.org"
"boards.#{parsed_referer.domain}"
end
end
def board
parsed_url.board || parsed_referer&.board
end
def thread_id
parsed_url.thread_id || parsed_referer&.thread_id
end
def image_id
parsed_url.image_id || parsed_referer&.image_id
end
def post_id
post_id_from_url || post_id_from_api
end
def post_id_from_url
parsed_url.post_id || parsed_referer&.post_id
end
def post_id_from_api
post["no"]
end
memoize def post
api_response.find do |post|
(image_id.present? && post["tim"] == image_id) || post["no"] == post_id_from_url
end.to_h
end
memoize def api_response
return [] unless api_url.present?
response = http.cache(1.minute).get(api_url)
return [] unless response.status == 200
response.parse["posts"]
end
def api_url
"https://a.4cdn.org/#{board}/thread/#{thread_id}.json" if board.present? && thread_id.present?
end
end
end
end

View File

@@ -61,6 +61,7 @@ module Source
Source::URL::Furaffinity,
Source::URL::Bilibili,
Source::URL::Rule34DotUs,
Source::URL::FourChan,
]
# Parse a URL into a subclass of Source::URL, or raise an exception if the URL is not a valid HTTP or HTTPS URL.

View File

@@ -0,0 +1,51 @@
# frozen_string_literal: true
# @see https://github.com/4chan/4chan-API
# @see https://github.com/4chan/4chan-API/blob/master/pages/User_images_and_static_content.md
class Source::URL::FourChan < Source::URL
attr_reader :board, :thread_id, :post_id, :image_type, :image_id, :full_image_url
def self.match?(url)
url.domain.in?(%w[4cdn.org 4chan.org 4channel.org])
end
def site_name
"4chan"
end
def parse
case [subdomain, domain, *path_segments]
# https://boards.4channel.org/vt/thread/37293562#p37294005
in _, ("4channel.org" | "4chan.org"), board, "thread", /\A[0-9]+\z/ => thread_id
@board = board
@thread_id = thread_id.to_i
@post_id = fragment.to_s[/^p([0-9]+)$/, 1]&.to_i
# https://i.4cdn.org/vt/1668729957824814.webm
# https://i.4cdn.org/vt/1668729957824814s.jpg
in "i", "4cdn.org", board, /\A([0-9]+)(s?)\./
@board = board
@image_id = $1.to_i
@image_type = $2 == "s" ? :preview : :original
@full_image_url = url.to_s if @image_type == :original
else
nil
end
end
def image_url?
host == "i.4cdn.org"
end
def page_url
if thread_id.present?
url.to_s
end
end
def api_url
"https://a.4cdn.org/#{board}/thread/#{thread_id}.json" if board.present? && thread_id.present?
end
end

View File

@@ -9,8 +9,6 @@ class Source::URL::Null < Source::URL
def site_name
case [subdomain, domain]
in _, "4cdn.org"
"4chan"
in _, "myportfolio.com"
"Adobe Portfolio"
in _, "adobe.com" if host == "portfolio.adobe.com"

View File

@@ -518,6 +518,8 @@ class UploadsControllerTest < ActionDispatch::IntegrationTest
should_upload_successfully("https://rule34.xxx/index.php?page=post&s=view&id=6961597")
should_upload_successfully("https://rule34.us/index.php?r=posts/view&id=6204967")
should_upload_successfully("https://boards.4channel.org/vt/thread/1#p1")
should_upload_successfully("http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663")
should_upload_successfully("http://seiga.nicovideo.jp/seiga/im4937663")
should_upload_successfully("https://seiga.nicovideo.jp/image/source/9146749")

View File

@@ -0,0 +1,73 @@
require "test_helper"
module Sources
class FourChanTest < ActiveSupport::TestCase
context "A 4chan source extractor" do
context "A 4chan direct image url without a referer" do
strategy_should_work(
"https://i.4cdn.org/vt/1611919211191.jpg",
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
page_url: nil,
artist_commentary_title: nil,
artist_commentary_desc: nil,
dtext_artist_commentary_desc: "",
download_size: 145_602,
)
end
context "A 4chan direct image url with a referer" do
strategy_should_work(
"https://i.4cdn.org/vt/1611919211191.jpg",
referer: "https://boards.4channel.org/vt/thread/1",
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
page_url: "https://boards.4channel.org/vt/thread/1#p1",
artist_commentary_title: "Anonymous 01/29/21(Fri)06:20:11 No.1",
dtext_artist_commentary_desc: <<~EOS.chomp,
"vt.jpg":[https://i.4cdn.org/vt/1611919211191.jpg] (142 KB, 767x677)
This board is for the discussion of Virtual YouTubers ("VTubers"), including those streaming in Japanese, English, and other languages. VTubers don't necessarily need to be on Youtube of course, they can be on Twitch, Niconico, Bilibili, or any other platform.
Please note that discussion should pertain to a VTuber's streams and content, and should not pertain to their real lives, relationships, or appearances ("IRL").
EOS
download_size: 145_602,
)
end
context "A 4chan thumbnail image url without a referer" do
strategy_should_work(
"https://i.4cdn.org/vt/1611919211191s.jpg",
image_urls: ["https://i.4cdn.org/vt/1611919211191s.jpg"],
page_url: nil,
artist_commentary_title: nil,
artist_commentary_desc: nil,
dtext_artist_commentary_desc: "",
download_size: 7430,
)
end
context "A 4chan post url" do
strategy_should_work(
"https://boards.4channel.org/vt/thread/1#p1",
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
page_url: "https://boards.4channel.org/vt/thread/1#p1",
artist_commentary_title: "Anonymous 01/29/21(Fri)06:20:11 No.1",
dtext_artist_commentary_desc: <<~EOS.chomp,
"vt.jpg":[https://i.4cdn.org/vt/1611919211191.jpg] (142 KB, 767x677)
This board is for the discussion of Virtual YouTubers ("VTubers"), including those streaming in Japanese, English, and other languages. VTubers don't necessarily need to be on Youtube of course, they can be on Twitch, Niconico, Bilibili, or any other platform.
Please note that discussion should pertain to a VTuber's streams and content, and should not pertain to their real lives, relationships, or appearances ("IRL").
EOS
download_size: 145_602,
)
end
context "A 4chan thread url" do
strategy_should_work(
"https://boards.4channel.org/vt/thread/1",
image_urls: ["https://i.4cdn.org/vt/1611919211191.jpg"],
page_url: "https://boards.4channel.org/vt/thread/1",
download_size: 145_602,
)
end
end
end
end