sources: fix 4chan URL parsing.
Add support for is2.4chan.org image URLs and various old URL formats.
This commit is contained in:
@@ -17,14 +17,30 @@ class Source::URL::FourChan < Source::URL
|
|||||||
case [subdomain, domain, *path_segments]
|
case [subdomain, domain, *path_segments]
|
||||||
|
|
||||||
# https://boards.4channel.org/vt/thread/37293562#p37294005
|
# https://boards.4channel.org/vt/thread/37293562#p37294005
|
||||||
in _, ("4channel.org" | "4chan.org"), board, "thread", /\A[0-9]+\z/ => thread_id
|
# http://boards.4chan.org/a/res/41938201
|
||||||
|
# http://zip.4chan.org/jp/res/3598845.html
|
||||||
|
in _, ("4channel.org" | "4chan.org"), board, ("thread" | "res"), /\A([0-9]+)(?:\.html)?\z/
|
||||||
@board = board
|
@board = board
|
||||||
@thread_id = thread_id.to_i
|
@thread_id = $1.to_i
|
||||||
@post_id = fragment.to_s[/^p([0-9]+)$/, 1]&.to_i
|
@post_id = fragment.to_s[/^p([0-9]+)$/, 1]&.to_i
|
||||||
|
|
||||||
# https://i.4cdn.org/vt/1668729957824814.webm
|
# https://i.4cdn.org/vt/1668729957824814.webm
|
||||||
# https://i.4cdn.org/vt/1668729957824814s.jpg
|
# https://i.4cdn.org/vt/1668729957824814s.jpg
|
||||||
in "i", "4cdn.org", board, /\A([0-9]+)(s?)\./
|
# https://is2.4chan.org/vg/1663135782567622.jpg
|
||||||
|
# http://is.4chan.org/vp/1483914199051.jpg
|
||||||
|
in ("i" | "is" | "is2"), _, board, /\A([0-9]+)(s?)\./
|
||||||
|
@board = board
|
||||||
|
@image_id = $1.to_i
|
||||||
|
@image_type = $2 == "s" ? :preview : :original
|
||||||
|
@full_image_url = url.to_s if @image_type == :original
|
||||||
|
|
||||||
|
# http://images.4chan.org/vg/src/1378607754334.jpg
|
||||||
|
# http://orz.4chan.org/e/src/1202811803217.png
|
||||||
|
# http://zip.4chan.org/a/src/1201922408724.jpg
|
||||||
|
# http://cgi.4chan.org/r/src/1210870653551.jpg
|
||||||
|
# http://cgi.4chan.org/f/src/0931.swf
|
||||||
|
# http://img.4chan.org/b/src/1226194386317.jpg
|
||||||
|
in _, "4chan.org", board, "src", /\A([0-9]+)(s?)\./
|
||||||
@board = board
|
@board = board
|
||||||
@image_id = $1.to_i
|
@image_id = $1.to_i
|
||||||
@image_type = $2 == "s" ? :preview : :original
|
@image_type = $2 == "s" ? :preview : :original
|
||||||
@@ -36,7 +52,7 @@ class Source::URL::FourChan < Source::URL
|
|||||||
end
|
end
|
||||||
|
|
||||||
def image_url?
|
def image_url?
|
||||||
host == "i.4cdn.org"
|
image_id.present?
|
||||||
end
|
end
|
||||||
|
|
||||||
def page_url
|
def page_url
|
||||||
|
|||||||
@@ -69,5 +69,21 @@ module Sources
|
|||||||
)
|
)
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
should "Parse 4chan URLs correctly" do
|
||||||
|
assert(Source::URL.image_url?("https://i.4cdn.org/vt/1668729957824814.webm"))
|
||||||
|
assert(Source::URL.image_url?("https://i.4cdn.org/vt/1668729957824814s.jpg"))
|
||||||
|
assert(Source::URL.image_url?("https://is2.4chan.org/vg/1663135782567622.jpg"))
|
||||||
|
assert(Source::URL.image_url?("http://is.4chan.org/vp/1483914199051.jpg"))
|
||||||
|
assert(Source::URL.image_url?("http://images.4chan.org/vg/src/1378607754334.jpg"))
|
||||||
|
assert(Source::URL.image_url?("http://orz.4chan.org/e/src/1202811803217.png"))
|
||||||
|
assert(Source::URL.image_url?("http://zip.4chan.org/a/src/1201922408724.jpg"))
|
||||||
|
assert(Source::URL.image_url?("http://cgi.4chan.org/r/src/1210870653551.jpg"))
|
||||||
|
assert(Source::URL.image_url?("http://img.4chan.org/b/src/1226194386317.jpg"))
|
||||||
|
|
||||||
|
assert(Source::URL.page_url?("https://boards.4channel.org/vt/thread/37293562#p37294005"))
|
||||||
|
assert(Source::URL.page_url?("http://boards.4chan.org/a/res/41938201"))
|
||||||
|
assert(Source::URL.page_url?("http://zip.4chan.org/jp/res/3598845.html"))
|
||||||
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user