sources: add 4chan.org upload support.
Add upload support for 4chan.org. You can upload either individual posts or entire threads at a time.
This commit is contained in:
133
app/logical/source/extractor/four_chan.rb
Normal file
133
app/logical/source/extractor/four_chan.rb
Normal file
@@ -0,0 +1,133 @@
|
||||
# frozen_string_literal: true
|
||||
|
||||
# Source extractor for 4chan.org.
|
||||
#
|
||||
# TODO:
|
||||
#
|
||||
# * If given only an image URL, scrape the board catalog to find which thread it belongs to.
|
||||
# * If the thread is expired, scrape data from archive sites.
|
||||
# * If the image or video is a soundpost, remux the file to include the sound (https://github.com/rcc11/4chan-sounds-player#creating-sound-images)
|
||||
#
|
||||
# @see https://github.com/4chan/4chan-API
|
||||
# @see https://github.com/4chan/4chan-API/blob/master/pages/Threads.md
|
||||
module Source
|
||||
class Extractor
|
||||
class FourChan < Source::Extractor
|
||||
def match?
|
||||
Source::URL::FourChan === parsed_url
|
||||
end
|
||||
|
||||
def image_urls
|
||||
if parsed_url.full_image_url.present?
|
||||
[parsed_url.full_image_url]
|
||||
# If this is a post URL, or an image URL for which we can find the post
|
||||
elsif post.present? && post["tim"].present? && post["ext"].present?
|
||||
["https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}"]
|
||||
# If this is a thread URL
|
||||
elsif thread_id.present? && post_id_from_url.nil?
|
||||
api_response.map do |post|
|
||||
"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}" if post["tim"].present? && post["ext"].present?
|
||||
end.compact
|
||||
# If this is a thumbnail image URL and we can't get the full image URL from the API
|
||||
elsif parsed_url.image_url?
|
||||
[url.to_s]
|
||||
else
|
||||
[]
|
||||
end
|
||||
end
|
||||
|
||||
def page_url
|
||||
if board.present? && thread_id.present? && post_id.present?
|
||||
"https://#{domain}/#{board}/thread/#{thread_id}#p#{post_id}"
|
||||
elsif board.present? && thread_id.present?
|
||||
"https://#{domain}/#{board}/thread/#{thread_id}"
|
||||
end
|
||||
end
|
||||
|
||||
def artist_commentary_title
|
||||
if post.present?
|
||||
"#{post["name"]}#{post["trip"]} #{post["now"]} No.#{post["no"]}"
|
||||
end
|
||||
end
|
||||
|
||||
def artist_commentary_desc
|
||||
commentary = String.new
|
||||
|
||||
if post["filename"].present?
|
||||
commentary << "<a href=\"https://i.4cdn.org/#{board}/#{post["tim"]}#{post["ext"]}\">#{post["filename"]}#{post["ext"]}</a> (#{post["fsize"].to_fs(:human_size)}, #{post["w"]}x#{post["h"]})\n"
|
||||
end
|
||||
|
||||
if post["com"].present?
|
||||
commentary << post["com"]
|
||||
end
|
||||
|
||||
commentary.presence
|
||||
end
|
||||
|
||||
def dtext_artist_commentary_desc
|
||||
DText.from_html(artist_commentary_desc) do |element|
|
||||
if element.name == "a" && element["class"] == "quotelink"
|
||||
# `<a href="#p1234" class="quotelink">>>1234</a>`
|
||||
if element["href"].starts_with?("#")
|
||||
element["href"] = "https://#{domain}/#{board}/thread/#{thread_id}#{element["href"]}"
|
||||
# `<a href="/vt/thread/1234#p5678" class="quotelink">>>5678</a>`
|
||||
elsif element["href"].starts_with?("/")
|
||||
element["href"] = "https://#{domain}#{element["href"]}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def domain
|
||||
if parsed_url.domain in "4chan.org" | "4channel.org"
|
||||
"boards.#{parsed_url.domain}"
|
||||
elsif parsed_referer&.domain in "4chan.org" | "4channel.org"
|
||||
"boards.#{parsed_referer.domain}"
|
||||
end
|
||||
end
|
||||
|
||||
def board
|
||||
parsed_url.board || parsed_referer&.board
|
||||
end
|
||||
|
||||
def thread_id
|
||||
parsed_url.thread_id || parsed_referer&.thread_id
|
||||
end
|
||||
|
||||
def image_id
|
||||
parsed_url.image_id || parsed_referer&.image_id
|
||||
end
|
||||
|
||||
def post_id
|
||||
post_id_from_url || post_id_from_api
|
||||
end
|
||||
|
||||
def post_id_from_url
|
||||
parsed_url.post_id || parsed_referer&.post_id
|
||||
end
|
||||
|
||||
def post_id_from_api
|
||||
post["no"]
|
||||
end
|
||||
|
||||
memoize def post
|
||||
api_response.find do |post|
|
||||
(image_id.present? && post["tim"] == image_id) || post["no"] == post_id_from_url
|
||||
end.to_h
|
||||
end
|
||||
|
||||
memoize def api_response
|
||||
return [] unless api_url.present?
|
||||
|
||||
response = http.cache(1.minute).get(api_url)
|
||||
return [] unless response.status == 200
|
||||
|
||||
response.parse["posts"]
|
||||
end
|
||||
|
||||
def api_url
|
||||
"https://a.4cdn.org/#{board}/thread/#{thread_id}.json" if board.present? && thread_id.present?
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user