sources: refactor normalize_for_source.

`normalize_for_source` was used to convert image URLs to page URLs when displaying sources
on the post show page. Move all the code for converting image URLs to page URLs from
`Sources::Strategies#normalize_for_source` to `Source::URL#page_url`.

Before we had to be very careful in source strategies not to make any network calls in
`normalize_for_source`, since it was used in the view for the post show page. Now all the
code for generating page URLs is isolated in Source::URL, which makes source strategies
simpler. It also makes it easier to check if a source is an image URL or page URL, and if
the image URL is convertible to a page URL, which will make autotagging bad_link or
bad_source feasible.

Finally, this fixes it to generate better page URLs in a handful of cases:

* https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP
* https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6
* http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677
* https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png
* https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
This commit is contained in:
evazion
2022-03-23 00:41:56 -05:00
parent 770f850c66
commit 3aa5cab2aa
59 changed files with 471 additions and 484 deletions

View File

@@ -16,7 +16,7 @@
# url = Source::URL.parse("https://twitter.com/yasunavert/status/1496123903290314755")
# url.site_name # => "Twitter"
# url.status_id # => "1496123903290314755"
# url.twitter_username # => "yasunavert"
# url.username # => "yasunavert"
#
# @see Danbooru::URL
module Source
@@ -53,7 +53,7 @@ module Source
# @return [Source::URL]
def self.parse!(url)
url = Danbooru::URL.new(url)
subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL
subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL::Null
subclass.new(url)
end
@@ -78,39 +78,30 @@ module Source
#
# @return [String]
def site_name
# XXX should go in dedicated subclasses.
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /fc2\.com\z/i
"FC2"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
if self.class == Source::URL
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
else
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
end
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
# Convert an image URL to the URL of the page containing the image, or
# return nil if it's not possible to convert the current URL to a page URL.
#
# When viewing a post, the source will be shown as the page URL if it's
# possible to convert the source from an image URL to a page URL.
#
# Examples:
#
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
# => https://www.pixiv.net/artworks/46324488
#
# * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
# => https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896
#
# * https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
# => nil
#
# @return [String, nil]
def page_url
nil
end
# Convert the current URL into a profile URL, or return nil if it's not
@@ -134,6 +125,14 @@ module Source
nil
end
def self.page_url(url)
Source::URL.parse(url)&.page_url
end
def self.profile_url(url)
Source::URL.parse(url)&.profile_url
end
protected def initialize(...)
super(...)
parse

View File

@@ -79,6 +79,10 @@ class Source::URL::ArtStation < Source::URL
end
end
def page_url
"https://www.artstation.com/artwork/#{work_id}" if work_id.present?
end
def profile_url
"https://www.artstation.com/#{username}" if username.present?
end

View File

@@ -78,6 +78,17 @@ class Source::URL::Fanbox < Source::URL
to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url?
end
def page_url
if username.present? && work_id.present?
"https://#{username}.fanbox.cc/posts/#{work_id}"
elsif user_id.present? && work_id.present?
"https://www.pixiv.net/fanbox/creator/#{user_id}/post/#{work_id}"
elsif user_id.present? && image_url?
# Use profile url as page url for cover images (XXX may cause problems with bad_source detection)
"https://www.pixiv.net/fanbox/creator/#{user_id}"
end
end
def profile_url
if username.present?
"https://#{username}.fanbox.cc"

View File

@@ -55,6 +55,7 @@ class Source::URL::Fantia < Source::URL
# https://fantia.jp/fanclubs/64496
# https://fantia.jp/fanclubs/1654/posts
# https://job.fantia.jp/fanclubs/5734
in _, "fanclubs", /\d+/ => fanclub_id, *rest
@fanclub_id = fanclub_id

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
class Source::URL::Fc2 < Source::URL
attr_reader :username, :profile_url
attr_reader :username, :profile_url, :page_url
def self.match?(url)
url.domain.in?(%w[fc2.com fc2blog.net fc2blog.us])
@@ -48,6 +48,7 @@ class Source::URL::Fc2 < Source::URL
# http://blog.fc2.com/g/b/o/gbot/20071023195141.jpg
in (/^blog-imgs-\d+(-origin)?$/ | "blog"), "fc2", "com", /^\w$/, /^\w$/, /^\w$/, username, file
@username = username
@page_url = "http://#{username}.blog.fc2.com/img/#{file}"
@profile_url = "http://#{username}.blog.fc2.com"
# http://diary.fc2.com/user/yuuri/img/2005_12/26.jpg
@@ -55,6 +56,9 @@ class Source::URL::Fc2 < Source::URL
# http://diary.fc2.com/user/kazuharoom/img/2015_5/22.jpg
in /diary\d*$/, "fc2", "com", "user", username, "img", date, file
@username = username
@year, @month = date.split("_")
@day = filename
@page_url = "http://#{host}/cgi-sys/ed.cgi/#{username}?Y=#{@year}&M=#{@month}&D=#{@day}"
@profile_url = "http://diary.fc2.com/cgi-sys/ed.cgi/#{username}"
# http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom/?Y=2012&M=10&D=22

View File

@@ -6,6 +6,7 @@
# Unsupported patterns:
# * https://foundation.app/@ <- This seems to be a novelty account.
# * https://foundation.app/mochiiimo <- no @
# * https://foundation.app/collection/kgfgen
class Source::URL::Foundation < Source::URL
attr_reader :username, :token_id, :work_id, :hash

View File

@@ -63,6 +63,14 @@ class Source::URL::HentaiFoundry < Source::URL
end
end
def page_url
if username.present? && work_id.present?
"https://www.hentai-foundry.com/pictures/user/#{username}/#{work_id}"
elsif work_id.present?
"https://www.hentai-foundry.com/pic-#{work_id}"
end
end
def profile_url
"https://www.hentai-foundry.com/user/#{username}" if username.present?
end

View File

@@ -34,6 +34,10 @@ class Source::URL::Instagram < Source::URL
end
end
def page_url
"https://www.instagram.com/p/#{work_id}/" if work_id.present?
end
def profile_url
# Instagram URLs canonically end with "/"
"https://www.instagram.com/#{username}/" if username.present?

View File

@@ -52,6 +52,10 @@ class Source::URL::Lofter < Source::URL
"#{site}#{path}" if image_url?
end
def page_url
"https://#{username}.lofter.com/post/#{work_id}" if username.present? && work_id.present?
end
def profile_url
"https://#{username}.lofter.com" if username.present?
end

View File

@@ -77,6 +77,14 @@ class Source::URL::Mastodon < Source::URL
full_image_url.present?
end
def page_url
if username.present? && work_id.present?
"https://#{host}/@#{username}/#{work_id}"
elsif work_id.present?
"https://#{host}/web/statuses/#{work_id}"
end
end
def profile_url
if username.present?
"https://#{host}/@#{username}"

View File

@@ -85,6 +85,14 @@ class Source::URL::Moebooru < Source::URL
end
end
def page_url
if work_id.present?
"https://#{domain}/post/show/#{work_id}"
elsif md5.present?
"https://#{domain}/post/show?md5=#{md5}"
end
end
def self.full_image_url(site_name, md5, file_ext, post_id = nil)
case site_name
when "Yande.re"

View File

@@ -59,6 +59,12 @@ class Source::URL::Newgrounds < Source::URL
url.host == "art.ngfiles.com"
end
def page_url
if username.present? && work_title.present?
"https://www.newgrounds.com/art/view/#{username}/#{work_title}"
end
end
def profile_url
"https://#{username}.newgrounds.com" if username.present?
end

View File

@@ -94,6 +94,10 @@ class Source::URL::Nijie < Source::URL
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end
def page_url
"https://nijie.info/view.php?id=#{work_id}" if work_id.present?
end
def profile_url
"https://nijie.info/members.php?id=#{user_id}" if user_id.present?
end

View File

@@ -0,0 +1,201 @@
# frozen_string_literal: true
class Source::URL::Null < Source::URL
attr_reader :work_id, :page_url
def self.match?(url)
true
end
def site_name
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
end
end
def parse
case [subdomain, domain, *path_segments]
# http://nekomataya.net/diarypro/data/upfile/66-1.jpg
# http://www117.sakura.ne.jp/~cat_rice/diarypro/data/upfile/31-1.jpg
# http://webknight0.sakura.ne.jp/cgi-bin/diarypro/data/upfile/9-1.jpg
in _, _, *subdirs, "diarypro", "data", "upfile", /^(\d+)-\d+\.(jpg|png|gif)$/ => file
@work_id = $1
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=723-4.jpg
# http://www.danshaku.sakura.ne.jp/cgi-bin/diarypro/diary.cgi?mode=image&upfile=56-1.jpg
# http://www.yanbow.com/~myanie/diarypro/diary.cgi?mode=image&upfile=279-1.jpg
in _, _, *subdirs, "diarypro", "diary.cgi" if params[:mode] == "image" && params[:upfile].present?
@work_id = params[:upfile][/^\d+/]
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://com2.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/018.jpg
# http://sozai.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/009.jpg
in _, "doujinantena.com", "contents_jpg", /^\h{32}$/ => md5, *rest
@md5 = md5
@page_url = "http://doujinantena.com/page.php?id=#{md5}"
# https://e-shuushuu.net/images/2017-07-19-915628.jpeg
in _, "e-shuushuu.net", "images", /^\d{4}-\d{2}-\d{2}-(\d+)\.(jpeg|jpg|png|gif)$/i
@work_id = $1
@page_url = "https://e-shuushuu.net/image/#{@work_id}"
# https://scontent.fmnl9-2.fna.fbcdn.net/v/t1.6435-9/196345051_961754654392125_8855002558147907833_n.jpg?_nc_cat=103&ccb=1-5&_nc_sid=0debeb&_nc_ohc=EB1RGiEOtyEAX9XE7aL&_nc_ht=scontent.fmnl9-2.fna&oh=00_AT8NNz_keqQ6VJeC1UVSMULhjaP3iykm-ONSMR7IrtarUQ&oe=6257862E
# https://scontent.fmnl8-2.fna.fbcdn.net/v/t1.6435-9/fr/cp0/e15/q65/80900683_480934615898749_6481759463945535488_n.jpg?_nc_cat=107&ccb=1-3&_nc_sid=8024bb&_nc_ohc=cCYFUzyHDmUAX-YHJIw&_nc_ht=scontent.fmnl8-2.fna&oh=e45c3837afcfefb6a4d93adfecef88c1&oe=60F6E392
# https://scontent.fmnl13-1.fna.fbcdn.net/v/t31.18172-8/22861751_1362164640578443_432921612329393062_o.jpg
# https://scontent-sin1-1.xx.fbcdn.net/hphotos-xlp1/t31.0-8/s960x960/12971037_586686358150819_495608200196301072_o.jpg
in _, "fbcdn.net", *subdirs, /^\d+_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo?fbid=#{@work_id}"
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xlp1/t31.0-8/s960x960/13173066_623015164516858_1844421675339995359_o.jpg
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xpf1/v/t1.0-9/s720x720/12032214_991569624217563_4908408819297057893_n.png?oh=efe6ea26aed89c8a12ddc1832b1f0157&oe=5667D5B1&__gda__=1453845772_c742c726735047f2feb836b845ff296f
in /fbcdn/, "akamaihd.net", *subdirs, /^\d_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/
@md5 = $1
@page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
in _, "hitomi.la", "galleries", gallery_id, /^(\d+)\w*\.(jpg|png|gif)$/ => image_id
@gallery_id = gallery_id
@image_id = $1.to_i
@page_url = "https://hitomi.la/reader/#{gallery_id}.html##{@image_id}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
in _, "hitomi.la", "galleries", gallery_id, file
@gallery_id = gallery_id
@page_url = "https://hitomi.la/galleries/#{gallery_id}.html"
# http://www.karabako.net/images/karabako_43878.jpg
# http://www.karabako.net/imagesub/karabako_43222_215.jpg
in _, "karabako.net", ("images" | "imagesub"), /^karabako_(\d+)/
@work_id = $1
@page_url = "http://www.karabako.net/post/view/#{work_id}"
# http://static.minitokyo.net/downloads/31/33/764181.jpg
in _, "minitokyo.net", "downloads", /^\d{2}$/, /^\d{2}$/, file
@work_id = filename
@page_url = "http://gallery.minitokyo.net/view/#{@work_id}"
# http://i.minus.com/j2LcOC52dGLtB.jpg
# http://i5.minus.com/ik26grnRJAmYh.jpg
in _, "minus.com", /^[ij]([a-zA-Z0-9]{12,})\.(jpg|png|gif)$/
@work_id = $1
@page_url = "http://minus.com/i/#{@work_id}"
# http://jpg.nijigen-daiaru.com/7364/013.jpg
in "jpg", "nijigen-daiaru.com", /^\d+$/ => work_id, file
@work_id = work_id
@page_url = "http://nijigen-daiaru.com/book.php?idb=#{@work_id}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/741/2662741/photo/160341863_624.v1353780834.jpg
in _, "photozou.jp", "pub", /^\d+$/, user_id, "photo", /^(\d+)/ => file
@user_id = user_id
@work_id = $1
@page_url = "https://photozou.jp/photo/show/#{@user_id}/#{@work_id}"
# https://tulip.paheal.net/_images/4f309b2b680da9c3444ed462bb172214/3910816%20-%20Dark_Magician_Girl%20MINK343%20Yu-Gi-Oh!.jpg
# http://rule34-data-002.paheal.net/_images/2ab55f9291c8f2c68cdbeac998714028/2401510%20-%20Ash_Ketchum%20Lillie%20Porkyman.jpg
# http://rule34-images.paheal.net/c4710f05e76bdee22fcd0d62bf1ac840/262685%20-%20mabinogi%20nao.jpg
in _, "paheal.net", *subdirs, /^\h{32}$/ => md5, /^(\d+)/ => file
@md5 = md5
@work_id = $1
@page_url = "https://rule34.paheal.net/post/view/#{@work_id}"
# https://api-cdn-mp4.rule34.xxx/images/4330/2f85040320f64c0e42128a8b8f6071ce.mp4
# https://ny5webm.rule34.xxx//images/4653/3c63956b940d0ff565faa8c7555b4686.mp4?5303486
# https://img.rule34.xxx//images/4977/7d76919c2f713c580f69fe129d2d1a44.jpeg?5668795
# http://rule34.xxx//images/993/5625625970c9ce8c5121fde518c2c4840801cd29.jpg?992983
# http://img3.rule34.xxx/img/rule34//images/1180/76c6497b5138c4122710c2d05458e729a8d34f7b.png?1190815
# http://aimg.rule34.xxx//samples/1267/sample_d628f215f27815dc9c1d365a199ee68e807efac1.jpg?1309664
in _, "rule34.xxx", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpg|jpeg|png|gif|webm|mp4)$/
@md5 = $1
@page_url = "https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
# https://cs.sankakucomplex.com/data/68/6c/686ceee03af38fe4ceb45bf1c50947e0.jpg?e=1591893718&m=fLlJfTrK_j2Rnc0uIHNC3w
# https://v.sankakucomplex.com/data/24/ff/24ff5da1fd7ed051b083b36e4e51de8e.mp4?e=1644999580&m=-OtZg2QdtKbibMte8vlsdw&expires=1644999580&token=0YUdUKKwTmvpozhG1WW_nRvSUQw3WJd574andQv-KYY
# https://cs.sankakucomplex.com/data/sample/2a/45/sample-2a45c67281b0fcfd26208063f81a3114.jpg?e=1590609355&m=cexHhVyJguoZqPB3z3N7aA
# http://c3.sankakucomplex.com/data/sample/8a/44/preview8a44211650e818ef07e5d00284c20a14.jpg
in _, "sankakucomplex.com", "data", *subdirs, /^(?:preview|sample-)?(\h{32})\.(jpg|jpeg|gif|png|webm|mp4)$/
@md5 = $1
@page_url = "https://chan.sankakucomplex.com/post/show?md5=#{@md5}"
# http://shimmie.katawa-shoujo.com/image/3657.jpg
in "shimmie", "katawa-shoujo.com", "image", file
@work_id = filename
@page_url = "https://shimmie.katawa-shoujo.com/post/view/#{@work_id}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
in ("img" | "ecdnimg"), "toranoana.jp", *subdirs, /^\d{2}$/, /^\d{4}$/, /^\d{2}$/, /^\d{2}$/, /^(\d{12})-\d+p\.jpg$/ => file
@work_id = $1
@page_url = "https://ec.toranoana.jp/tora_r/ec/item/#{@work_id}"
# http://p.twpl.jp/show/orig/DTaCZ
# http://p.twpl.jp/show/large/5zack
# http://p.twipple.jp/show/orig/vXqaU
in _, ("twpl.jp" | "twipple.jp"), "show", ("large" | "orig"), work_id
@work_id = work_id
@page_url = "http://p.twipple.jp/#{work_id}"
# https://vignette.wikia.nocookie.net/queensblade/images/3/33/WGAIRI1.jpg/
# https://vignette1.wikia.nocookie.net/valkyriecrusade/images/b/bf/Joan_Of_Arc_H.png/revision/latest?cb=20170801081004
# https://static.wikia.nocookie.net/valkyriecrusade/images/3/3f/Joan_Of_Arc.png/revision/latest/scale-to-width-down/270?cb=20170801081000
in _, "nocookie.net", wiki, "images", /^\h$/, /^\h\h$/, file, *rest
@wiki = wiki
@file = file
@page_url = "https://#{wiki}.fandom.com/wiki/File:#{file}"
# https://static.zerochan.net/Fullmetal.Alchemist.full.2831797.png
# https://s1.zerochan.net/Cocoa.Cookie.600.2957938.jpg
# http://static.zerochan.net/full/24/13/90674.jpg
in _, "zerochan.net", *subdirs, /(\d+)\.(jpg|png|gif)$/
@work_id = $1
@page_url = "https://www.zerochan.net/#{@work_id}#full"
# http://www.zerochan.net/full/1567893
in _, "zerochan.net", "full", /^\d+$/ => work_id
@work_id = work_id
@page_url = "https://www.zerochan.net/#{@work_id}#full"
else
end
end
end

View File

@@ -52,6 +52,10 @@ class Source::URL::Plurk < Source::URL
host == "images.plurk.com"
end
def page_url
"https://www.plurk.com/p/#{work_id}" if work_id.present?
end
def profile_url
"https://www.plurk.com/#{username}" if username.present?
end

View File

@@ -43,6 +43,10 @@ class Source::URL::Skeb < Source::URL
end
end
def page_url
"https://skeb.jp/@#{username}/works/#{work_id}" if username.present? && work_id.present?
end
def profile_url
"https://skeb.jp/@#{username}" if username.present?
end

View File

@@ -85,8 +85,7 @@ class Source::URL::Tumblr < Source::URL
end
def page_url
return nil unless @blog_name.present? && @work_id.present?
"https://#{@blog_name}.tumblr.com/post/#{@work_id}"
"https://#{blog_name}.tumblr.com/post/#{work_id}" if blog_name.present? && work_id.present?
end
def profile_url

View File

@@ -72,8 +72,7 @@ class Source::URL::TwitPic < Source::URL
end
def page_url
return nil unless base36_id.present?
"https://twitpic.com/#{base36_id}"
"https://twitpic.com/#{base36_id}" if base36_id.present?
end
def profile_url

View File

@@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search]
attr_reader :status_id, :twitter_username, :user_id
attr_reader :status_id, :username, :user_id
def self.match?(url)
return false if Source::URL::TwitPic.match?(url) # TwitPic uses https://o.twimg.com/ URLs
@@ -45,12 +45,12 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1
# https://twitter.com/sato_1_11/status/1496489742791475201/photo/2
in "twitter.com", username, "status", status_id, *rest
@twitter_username = username
@username = username
@status_id = status_id
# https://twitter.com/motty08111213
in "twitter.com", username, *rest
@twitter_username = username unless username.in?(RESERVED_USERNAMES)
@username = username unless username.in?(RESERVED_USERNAMES)
# https://twitter.com/intent/user?user_id=1485229827984531457
in "twitter.com", "intent", "user" if params[:user_id].present?
@@ -58,7 +58,7 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/intent/user?screen_name=ryuudog_NFT
in "twitter.com", "intent", "user" if params[:screen_name].present?
@twitter_username = params[:screen_name]
@username = params[:screen_name]
# https://twitter.com/i/user/889592953
in "twitter.com", "i", "user", user_id
@@ -101,9 +101,17 @@ class Source::URL::Twitter < Source::URL
"#{site}/#{@file_path}:orig"
end
def page_url
if username.present? && status_id.present?
"https://twitter.com/#{username}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def profile_url
if twitter_username.present?
"https://twitter.com/#{twitter_username}"
if username.present?
"https://twitter.com/#{username}"
elsif user_id.present?
# "https://twitter.com/i/user/#{user_id}
"https://twitter.com/intent/user?user_id=#{user_id}"

View File

@@ -104,7 +104,7 @@ class Source::URL::Weibo < Source::URL
end
end
def normalized_url
def page_url
if @artist_short_id.present? && @illust_base62_id.present?
"https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}"
elsif mobile_url.present?

View File

@@ -23,7 +23,6 @@ module Sources
Strategies::Foundation,
Strategies::Plurk,
Strategies::Tinami,
Strategies::TwitPic,
Strategies::Fantia,
]
end
@@ -36,9 +35,5 @@ module Sources
def self.canonical(url, referer)
find(url, referer).canonical_url
end
def self.normalize_source(url)
find(url).normalize_for_source || url
end
end
end

View File

@@ -52,16 +52,6 @@ module Sources::Strategies
end
end
def normalize_for_source
return if project_id.blank?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.artstation.com/projects/#{project_id}"
else
"https://www.artstation.com/artwork/#{project_id}"
end
end
def image_urls_from_api
api_response[:assets].to_a.map do |asset|
if asset[:asset_type] == "image"

View File

@@ -138,12 +138,6 @@ module Sources
end
memoize :http_downloader
# Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead.
# This function should never make any network call, even indirectly. Return nil to never normalize.
def normalize_for_source
nil
end
def artists
ArtistFinder.find_artists(profile_url)
end

View File

@@ -65,10 +65,6 @@ module Sources
end
end
def normalize_for_source
page_url_from_image_url
end
def profile_url
return nil if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}"

View File

@@ -34,19 +34,6 @@ module Sources
end
end
def normalize_for_source
if illust_id.present?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}"
elsif artist_id_from_url.present?
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}"
end
elsif artist_id_from_url.present?
# Cover images
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}"
end
end
def profile_url
return if artist_name.blank?

View File

@@ -128,10 +128,6 @@ module Sources::Strategies
DText.from_html(artist_commentary_desc)
end
def normalize_for_source
page_url
end
def work_type
parsed_url.work_type || parsed_referer&.work_type
end

View File

@@ -76,10 +76,6 @@ module Sources
DText.from_html(artist_commentary_desc)
end
def normalize_for_source
page_url
end
def api_response
return {} if page.nil?

View File

@@ -68,10 +68,6 @@ module Sources
DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
end
def normalize_for_source
page_url
end
def illust_id
parsed_url.work_id || parsed_referer&.work_id
end

View File

@@ -47,10 +47,6 @@ module Sources
page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html
end
def normalize_for_source
page_url
end
def illust_id
parsed_url.work_id || parsed_referer&.work_id
end

View File

@@ -79,10 +79,6 @@ module Sources::Strategies
api_response.tags
end
def normalize_for_source
page_url
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
if element.name == "a"

View File

@@ -25,17 +25,6 @@ module Sources
image_urls.first
end
def normalize_for_source
id = post_id_from_url
md5 = post_md5_from_url
if id.present?
"https://#{domain}/post/show/#{id}"
elsif md5.present?
"https://#{domain}/post?tags=md5:#{md5}"
end
end
def tags
api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]

View File

@@ -84,10 +84,6 @@ module Sources
end
end
def normalize_for_source
page_url
end
def user_name
parsed_url.username || parsed_referer&.username
end

View File

@@ -66,10 +66,6 @@ module Sources
end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp
end
def normalize_for_source
page_url
end
def tag_name
return if api_client&.user_id.blank?
"nicoseiga#{api_client.user_id}"

View File

@@ -109,12 +109,6 @@ module Sources
artist_id_from_url || artist_id_from_page
end
def normalize_for_source
return if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"
end
def doujin?
page&.at("#dojin_left").present?
end

View File

@@ -18,112 +18,6 @@ module Sources
def artists
ArtistFinder.find_artists(url)
end
def normalize_for_source
case url
when %r{\Ahttp://www\.karabako\.net/images(?:ub)?/karabako_(\d+)(?:_\d+)?\.}i
"http://www.karabako.net/post/view/#{$1}"
# XXX http://twipple.jp is defunct
# http://p.twpl.jp/show/orig/myRVs
when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i
"http://p.twipple.jp/#{$1}"
when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i
username = $1
filename = $2
"http://#{username}.blog.fc2.com/img/#{filename}/"
when %r{\Ahttps?://diary(\d)?\.fc2\.com/user/([^/]+)/img/(\d+)_(\d+)/(\d+)\.}i
server_id = $1
username = $2
year = $3
month = $4
day = $5
"http://diary#{server_id}.fc2.com/cgi-sys/ed.cgi/#{username}?Y=#{year}&M=#{month}&D=#{day}"
when %r{\Ahttps?://(?:fbcdn-)?s(?:content|photos)-[^/]+\.(?:fbcdn|akamaihd)\.net/hphotos-.+/\d+_(\d+)_(?:\d+_){1,3}[no]\.}i
"https://www.facebook.com/photo.php?fbid=#{$1}"
when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i
"https://chan.sankakucomplex.com/en/post/show?md5=#{$1}"
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i
"https://www.zerochan.net/#{$1}#full"
when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i
"http://gallery.minitokyo.net/download/#{$1}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
when %r{\Ahttps?://(?:\w+\.)?gelbooru\.com//?(?:images|samples)/(?:\d+|\h\h/\h\h)/(?:sample_)?(?<md5>\h{32})\.}i
"https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{$~[:md5]}"
when %r{\Ahttps?://(?:slot\d*\.)?im(?:g|ages)\d*\.wikia\.(?:nocookie\.net|com)/(?:_{2}cb\d{14}/)?([^/]+)(?:/[a-z]{2})?/images/(?:(?:thumb|archive)?/)?[a-f0-9]/[a-f0-9]{2}/(?:\d{14}(?:!|%21))?([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://vignette(?:\d*)\.wikia\.nocookie\.net/([^/]+)/images/[a-f0-9]/[a-f0-9]{2}/([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://e-shuushuu.net/images/\d{4}-(?:\d{2}-){2}(\d+)}i
"https://e-shuushuu.net/image/#{$1}"
when %r{\Ahttps?://jpg\.nijigen-daiaru\.com/(\d+)}i
"http://nijigen-daiaru.com/book.php?idb=#{$1}"
when %r{\Ahttps?://sozai\.doujinantena\.com/contents_jpg/([a-f0-9]{32})/}i
"http://doujinantena.com/page.php?id=#{$1}"
when %r{\Ahttps?://rule34-(?:data-\d{3}|images)\.paheal\.net/(?:_images/)?([a-f0-9]{32})}i
"https://rule34.paheal.net/post/list/md5:#{$1}/1"
when %r{\Ahttps?://shimmie\.katawa-shoujo\.com/image/(\d+)}i
"https://shimmie.katawa-shoujo.com/post/view/#{$1}"
when %r{\Ahttps://(?:(?:\w+\.)?rule34\.xxx|img\.booru\.org/(?:rule34|r34))(?:/(?:img/rule34|r34))?/{1,2}images/\d+/([a-f0-9]{32})\.}i
"https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
when %r{(\Ahttps?://.+)/diarypro/d(?:ata/upfile/|iary\.cgi\?mode=image&upfile=)(\d+)}i
base_url = $1
entry_no = $2
"#{base_url}/diarypro/diary.cgi?no=#{entry_no}"
# XXX site is defunct
when %r{\Ahttps?://i(?:\d)?\.minus\.com/(?:i|j)([^\.]{12,})}i
"http://minus.com/i/#{$1}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg
when %r{\Ahttps?://\w+\.photozou\.jp/pub/\d+/(?<artist_id>\d+)/photo/(?<photo_id>\d+)_.*$}i
"https://photozou.jp/photo/show/#{$~[:artist_id]}/#{$~[:photo_id]}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i
"https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/(?<image_id>\d+)\w*\.[a-z]+\z}i
"https://hitomi.la/reader/#{$~[:gallery_id]}.html##{$~[:image_id].to_i}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/\w*\.[a-z]+\z}i
"https://hitomi.la/galleries/#{$~[:gallery_id]}.html"
else
nil
end
end
end
end
end

View File

@@ -96,11 +96,6 @@ module Sources
api_illust[:description]
end
def normalize_for_source
return nil if illust_id.blank?
"https://www.pixiv.net/artworks/#{illust_id}"
end
def tag_name
moniker
end

View File

@@ -110,10 +110,6 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def normalize_for_source
page_url
end
memoize :page, :page_json, :api_replies
end
end

View File

@@ -48,10 +48,6 @@ module Sources
"https://skeb.jp/@#{artist_name}/works/#{illust_id}"
end
def normalize_for_source
page_url
end
def api_url
return nil unless artist_name.present? && illust_id.present?
"https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}"

View File

@@ -83,10 +83,6 @@ module Sources::Strategies
super(tag)
end
def normalize_for_source
parsed_url.page_url
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip
end

View File

@@ -1,14 +0,0 @@
# frozen_string_literal: true
# @see Source::URL::TwitPic
module Sources::Strategies
class TwitPic < Base
def match?
Source::URL::TwitPic === parsed_url
end
def normalize_for_source
parsed_url.page_url || url
end
end
end

View File

@@ -93,14 +93,6 @@ module Sources::Strategies
api_response[:full_text].to_s
end
def normalize_for_source
if tag_name_from_url.present? && status_id.present?
"https://twitter.com/#{tag_name_from_url}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def tags
api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
[hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
@@ -150,7 +142,7 @@ module Sources::Strategies
end
def tag_name_from_url
parsed_url.twitter_username || parsed_referer&.twitter_username
parsed_url.username || parsed_referer&.username
end
memoize :api_response

View File

@@ -87,10 +87,6 @@ module Sources
end
end
def normalize_for_source
parsed_url.normalized_url
end
def api_response
return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?

View File

@@ -307,15 +307,16 @@ class Post < ApplicationRecord
end
end
def parsed_source
Source::URL.parse(source) if web_source?
end
def normalized_source
return source unless web_source?
Sources::Strategies.normalize_source(source)
parsed_source&.page_url || source
end
def source_domain
return "" unless web_source?
Danbooru::URL.parse(normalized_source)&.domain.to_s
parsed_source&.domain.to_s
end
end