sources: refactor normalize_for_source.

`normalize_for_source` was used to convert image URLs to page URLs when displaying sources
on the post show page. Move all the code for converting image URLs to page URLs from
`Sources::Strategies#normalize_for_source` to `Source::URL#page_url`.

Before we had to be very careful in source strategies not to make any network calls in
`normalize_for_source`, since it was used in the view for the post show page. Now all the
code for generating page URLs is isolated in Source::URL, which makes source strategies
simpler. It also makes it easier to check if a source is an image URL or page URL, and if
the image URL is convertible to a page URL, which will make autotagging bad_link or
bad_source feasible.

Finally, this fixes it to generate better page URLs in a handful of cases:

* https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP
* https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6
* http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677
* https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png
* https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
This commit is contained in:
evazion
2022-03-23 00:41:56 -05:00
parent 770f850c66
commit 3aa5cab2aa
59 changed files with 471 additions and 484 deletions

View File

@@ -16,7 +16,7 @@
# url = Source::URL.parse("https://twitter.com/yasunavert/status/1496123903290314755")
# url.site_name # => "Twitter"
# url.status_id # => "1496123903290314755"
# url.twitter_username # => "yasunavert"
# url.username # => "yasunavert"
#
# @see Danbooru::URL
module Source
@@ -53,7 +53,7 @@ module Source
# @return [Source::URL]
def self.parse!(url)
url = Danbooru::URL.new(url)
subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL
subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL::Null
subclass.new(url)
end
@@ -78,39 +78,30 @@ module Source
#
# @return [String]
def site_name
# XXX should go in dedicated subclasses.
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /fc2\.com\z/i
"FC2"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
if self.class == Source::URL
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
else
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
end
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
# Convert an image URL to the URL of the page containing the image, or
# return nil if it's not possible to convert the current URL to a page URL.
#
# When viewing a post, the source will be shown as the page URL if it's
# possible to convert the source from an image URL to a page URL.
#
# Examples:
#
# * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
# => https://www.pixiv.net/artworks/46324488
#
# * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
# => https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896
#
# * https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
# => nil
#
# @return [String, nil]
def page_url
nil
end
# Convert the current URL into a profile URL, or return nil if it's not
@@ -134,6 +125,14 @@ module Source
nil
end
def self.page_url(url)
Source::URL.parse(url)&.page_url
end
def self.profile_url(url)
Source::URL.parse(url)&.profile_url
end
protected def initialize(...)
super(...)
parse

View File

@@ -79,6 +79,10 @@ class Source::URL::ArtStation < Source::URL
end
end
def page_url
"https://www.artstation.com/artwork/#{work_id}" if work_id.present?
end
def profile_url
"https://www.artstation.com/#{username}" if username.present?
end

View File

@@ -78,6 +78,17 @@ class Source::URL::Fanbox < Source::URL
to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url?
end
def page_url
if username.present? && work_id.present?
"https://#{username}.fanbox.cc/posts/#{work_id}"
elsif user_id.present? && work_id.present?
"https://www.pixiv.net/fanbox/creator/#{user_id}/post/#{work_id}"
elsif user_id.present? && image_url?
# Use profile url as page url for cover images (XXX may cause problems with bad_source detection)
"https://www.pixiv.net/fanbox/creator/#{user_id}"
end
end
def profile_url
if username.present?
"https://#{username}.fanbox.cc"

View File

@@ -55,6 +55,7 @@ class Source::URL::Fantia < Source::URL
# https://fantia.jp/fanclubs/64496
# https://fantia.jp/fanclubs/1654/posts
# https://job.fantia.jp/fanclubs/5734
in _, "fanclubs", /\d+/ => fanclub_id, *rest
@fanclub_id = fanclub_id

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true
class Source::URL::Fc2 < Source::URL
attr_reader :username, :profile_url
attr_reader :username, :profile_url, :page_url
def self.match?(url)
url.domain.in?(%w[fc2.com fc2blog.net fc2blog.us])
@@ -48,6 +48,7 @@ class Source::URL::Fc2 < Source::URL
# http://blog.fc2.com/g/b/o/gbot/20071023195141.jpg
in (/^blog-imgs-\d+(-origin)?$/ | "blog"), "fc2", "com", /^\w$/, /^\w$/, /^\w$/, username, file
@username = username
@page_url = "http://#{username}.blog.fc2.com/img/#{file}"
@profile_url = "http://#{username}.blog.fc2.com"
# http://diary.fc2.com/user/yuuri/img/2005_12/26.jpg
@@ -55,6 +56,9 @@ class Source::URL::Fc2 < Source::URL
# http://diary.fc2.com/user/kazuharoom/img/2015_5/22.jpg
in /diary\d*$/, "fc2", "com", "user", username, "img", date, file
@username = username
@year, @month = date.split("_")
@day = filename
@page_url = "http://#{host}/cgi-sys/ed.cgi/#{username}?Y=#{@year}&M=#{@month}&D=#{@day}"
@profile_url = "http://diary.fc2.com/cgi-sys/ed.cgi/#{username}"
# http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom/?Y=2012&M=10&D=22

View File

@@ -6,6 +6,7 @@
# Unsupported patterns:
# * https://foundation.app/@ <- This seems to be a novelty account.
# * https://foundation.app/mochiiimo <- no @
# * https://foundation.app/collection/kgfgen
class Source::URL::Foundation < Source::URL
attr_reader :username, :token_id, :work_id, :hash

View File

@@ -63,6 +63,14 @@ class Source::URL::HentaiFoundry < Source::URL
end
end
def page_url
if username.present? && work_id.present?
"https://www.hentai-foundry.com/pictures/user/#{username}/#{work_id}"
elsif work_id.present?
"https://www.hentai-foundry.com/pic-#{work_id}"
end
end
def profile_url
"https://www.hentai-foundry.com/user/#{username}" if username.present?
end

View File

@@ -34,6 +34,10 @@ class Source::URL::Instagram < Source::URL
end
end
def page_url
"https://www.instagram.com/p/#{work_id}/" if work_id.present?
end
def profile_url
# Instagram URLs canonically end with "/"
"https://www.instagram.com/#{username}/" if username.present?

View File

@@ -52,6 +52,10 @@ class Source::URL::Lofter < Source::URL
"#{site}#{path}" if image_url?
end
def page_url
"https://#{username}.lofter.com/post/#{work_id}" if username.present? && work_id.present?
end
def profile_url
"https://#{username}.lofter.com" if username.present?
end

View File

@@ -77,6 +77,14 @@ class Source::URL::Mastodon < Source::URL
full_image_url.present?
end
def page_url
if username.present? && work_id.present?
"https://#{host}/@#{username}/#{work_id}"
elsif work_id.present?
"https://#{host}/web/statuses/#{work_id}"
end
end
def profile_url
if username.present?
"https://#{host}/@#{username}"

View File

@@ -85,6 +85,14 @@ class Source::URL::Moebooru < Source::URL
end
end
def page_url
if work_id.present?
"https://#{domain}/post/show/#{work_id}"
elsif md5.present?
"https://#{domain}/post/show?md5=#{md5}"
end
end
def self.full_image_url(site_name, md5, file_ext, post_id = nil)
case site_name
when "Yande.re"

View File

@@ -59,6 +59,12 @@ class Source::URL::Newgrounds < Source::URL
url.host == "art.ngfiles.com"
end
def page_url
if username.present? && work_title.present?
"https://www.newgrounds.com/art/view/#{username}/#{work_title}"
end
end
def profile_url
"https://#{username}.newgrounds.com" if username.present?
end

View File

@@ -94,6 +94,10 @@ class Source::URL::Nijie < Source::URL
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end
def page_url
"https://nijie.info/view.php?id=#{work_id}" if work_id.present?
end
def profile_url
"https://nijie.info/members.php?id=#{user_id}" if user_id.present?
end

View File

@@ -0,0 +1,201 @@
# frozen_string_literal: true
class Source::URL::Null < Source::URL
attr_reader :work_id, :page_url
def self.match?(url)
true
end
def site_name
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
end
end
def parse
case [subdomain, domain, *path_segments]
# http://nekomataya.net/diarypro/data/upfile/66-1.jpg
# http://www117.sakura.ne.jp/~cat_rice/diarypro/data/upfile/31-1.jpg
# http://webknight0.sakura.ne.jp/cgi-bin/diarypro/data/upfile/9-1.jpg
in _, _, *subdirs, "diarypro", "data", "upfile", /^(\d+)-\d+\.(jpg|png|gif)$/ => file
@work_id = $1
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=723-4.jpg
# http://www.danshaku.sakura.ne.jp/cgi-bin/diarypro/diary.cgi?mode=image&upfile=56-1.jpg
# http://www.yanbow.com/~myanie/diarypro/diary.cgi?mode=image&upfile=279-1.jpg
in _, _, *subdirs, "diarypro", "diary.cgi" if params[:mode] == "image" && params[:upfile].present?
@work_id = params[:upfile][/^\d+/]
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://com2.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/018.jpg
# http://sozai.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/009.jpg
in _, "doujinantena.com", "contents_jpg", /^\h{32}$/ => md5, *rest
@md5 = md5
@page_url = "http://doujinantena.com/page.php?id=#{md5}"
# https://e-shuushuu.net/images/2017-07-19-915628.jpeg
in _, "e-shuushuu.net", "images", /^\d{4}-\d{2}-\d{2}-(\d+)\.(jpeg|jpg|png|gif)$/i
@work_id = $1
@page_url = "https://e-shuushuu.net/image/#{@work_id}"
# https://scontent.fmnl9-2.fna.fbcdn.net/v/t1.6435-9/196345051_961754654392125_8855002558147907833_n.jpg?_nc_cat=103&ccb=1-5&_nc_sid=0debeb&_nc_ohc=EB1RGiEOtyEAX9XE7aL&_nc_ht=scontent.fmnl9-2.fna&oh=00_AT8NNz_keqQ6VJeC1UVSMULhjaP3iykm-ONSMR7IrtarUQ&oe=6257862E
# https://scontent.fmnl8-2.fna.fbcdn.net/v/t1.6435-9/fr/cp0/e15/q65/80900683_480934615898749_6481759463945535488_n.jpg?_nc_cat=107&ccb=1-3&_nc_sid=8024bb&_nc_ohc=cCYFUzyHDmUAX-YHJIw&_nc_ht=scontent.fmnl8-2.fna&oh=e45c3837afcfefb6a4d93adfecef88c1&oe=60F6E392
# https://scontent.fmnl13-1.fna.fbcdn.net/v/t31.18172-8/22861751_1362164640578443_432921612329393062_o.jpg
# https://scontent-sin1-1.xx.fbcdn.net/hphotos-xlp1/t31.0-8/s960x960/12971037_586686358150819_495608200196301072_o.jpg
in _, "fbcdn.net", *subdirs, /^\d+_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo?fbid=#{@work_id}"
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xlp1/t31.0-8/s960x960/13173066_623015164516858_1844421675339995359_o.jpg
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xpf1/v/t1.0-9/s720x720/12032214_991569624217563_4908408819297057893_n.png?oh=efe6ea26aed89c8a12ddc1832b1f0157&oe=5667D5B1&__gda__=1453845772_c742c726735047f2feb836b845ff296f
in /fbcdn/, "akamaihd.net", *subdirs, /^\d_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/
@md5 = $1
@page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
in _, "hitomi.la", "galleries", gallery_id, /^(\d+)\w*\.(jpg|png|gif)$/ => image_id
@gallery_id = gallery_id
@image_id = $1.to_i
@page_url = "https://hitomi.la/reader/#{gallery_id}.html##{@image_id}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
in _, "hitomi.la", "galleries", gallery_id, file
@gallery_id = gallery_id
@page_url = "https://hitomi.la/galleries/#{gallery_id}.html"
# http://www.karabako.net/images/karabako_43878.jpg
# http://www.karabako.net/imagesub/karabako_43222_215.jpg
in _, "karabako.net", ("images" | "imagesub"), /^karabako_(\d+)/
@work_id = $1
@page_url = "http://www.karabako.net/post/view/#{work_id}"
# http://static.minitokyo.net/downloads/31/33/764181.jpg
in _, "minitokyo.net", "downloads", /^\d{2}$/, /^\d{2}$/, file
@work_id = filename
@page_url = "http://gallery.minitokyo.net/view/#{@work_id}"
# http://i.minus.com/j2LcOC52dGLtB.jpg
# http://i5.minus.com/ik26grnRJAmYh.jpg
in _, "minus.com", /^[ij]([a-zA-Z0-9]{12,})\.(jpg|png|gif)$/
@work_id = $1
@page_url = "http://minus.com/i/#{@work_id}"
# http://jpg.nijigen-daiaru.com/7364/013.jpg
in "jpg", "nijigen-daiaru.com", /^\d+$/ => work_id, file
@work_id = work_id
@page_url = "http://nijigen-daiaru.com/book.php?idb=#{@work_id}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/741/2662741/photo/160341863_624.v1353780834.jpg
in _, "photozou.jp", "pub", /^\d+$/, user_id, "photo", /^(\d+)/ => file
@user_id = user_id
@work_id = $1
@page_url = "https://photozou.jp/photo/show/#{@user_id}/#{@work_id}"
# https://tulip.paheal.net/_images/4f309b2b680da9c3444ed462bb172214/3910816%20-%20Dark_Magician_Girl%20MINK343%20Yu-Gi-Oh!.jpg
# http://rule34-data-002.paheal.net/_images/2ab55f9291c8f2c68cdbeac998714028/2401510%20-%20Ash_Ketchum%20Lillie%20Porkyman.jpg
# http://rule34-images.paheal.net/c4710f05e76bdee22fcd0d62bf1ac840/262685%20-%20mabinogi%20nao.jpg
in _, "paheal.net", *subdirs, /^\h{32}$/ => md5, /^(\d+)/ => file
@md5 = md5
@work_id = $1
@page_url = "https://rule34.paheal.net/post/view/#{@work_id}"
# https://api-cdn-mp4.rule34.xxx/images/4330/2f85040320f64c0e42128a8b8f6071ce.mp4
# https://ny5webm.rule34.xxx//images/4653/3c63956b940d0ff565faa8c7555b4686.mp4?5303486
# https://img.rule34.xxx//images/4977/7d76919c2f713c580f69fe129d2d1a44.jpeg?5668795
# http://rule34.xxx//images/993/5625625970c9ce8c5121fde518c2c4840801cd29.jpg?992983
# http://img3.rule34.xxx/img/rule34//images/1180/76c6497b5138c4122710c2d05458e729a8d34f7b.png?1190815
# http://aimg.rule34.xxx//samples/1267/sample_d628f215f27815dc9c1d365a199ee68e807efac1.jpg?1309664
in _, "rule34.xxx", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpg|jpeg|png|gif|webm|mp4)$/
@md5 = $1
@page_url = "https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
# https://cs.sankakucomplex.com/data/68/6c/686ceee03af38fe4ceb45bf1c50947e0.jpg?e=1591893718&m=fLlJfTrK_j2Rnc0uIHNC3w
# https://v.sankakucomplex.com/data/24/ff/24ff5da1fd7ed051b083b36e4e51de8e.mp4?e=1644999580&m=-OtZg2QdtKbibMte8vlsdw&expires=1644999580&token=0YUdUKKwTmvpozhG1WW_nRvSUQw3WJd574andQv-KYY
# https://cs.sankakucomplex.com/data/sample/2a/45/sample-2a45c67281b0fcfd26208063f81a3114.jpg?e=1590609355&m=cexHhVyJguoZqPB3z3N7aA
# http://c3.sankakucomplex.com/data/sample/8a/44/preview8a44211650e818ef07e5d00284c20a14.jpg
in _, "sankakucomplex.com", "data", *subdirs, /^(?:preview|sample-)?(\h{32})\.(jpg|jpeg|gif|png|webm|mp4)$/
@md5 = $1
@page_url = "https://chan.sankakucomplex.com/post/show?md5=#{@md5}"
# http://shimmie.katawa-shoujo.com/image/3657.jpg
in "shimmie", "katawa-shoujo.com", "image", file
@work_id = filename
@page_url = "https://shimmie.katawa-shoujo.com/post/view/#{@work_id}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
in ("img" | "ecdnimg"), "toranoana.jp", *subdirs, /^\d{2}$/, /^\d{4}$/, /^\d{2}$/, /^\d{2}$/, /^(\d{12})-\d+p\.jpg$/ => file
@work_id = $1
@page_url = "https://ec.toranoana.jp/tora_r/ec/item/#{@work_id}"
# http://p.twpl.jp/show/orig/DTaCZ
# http://p.twpl.jp/show/large/5zack
# http://p.twipple.jp/show/orig/vXqaU
in _, ("twpl.jp" | "twipple.jp"), "show", ("large" | "orig"), work_id
@work_id = work_id
@page_url = "http://p.twipple.jp/#{work_id}"
# https://vignette.wikia.nocookie.net/queensblade/images/3/33/WGAIRI1.jpg/
# https://vignette1.wikia.nocookie.net/valkyriecrusade/images/b/bf/Joan_Of_Arc_H.png/revision/latest?cb=20170801081004
# https://static.wikia.nocookie.net/valkyriecrusade/images/3/3f/Joan_Of_Arc.png/revision/latest/scale-to-width-down/270?cb=20170801081000
in _, "nocookie.net", wiki, "images", /^\h$/, /^\h\h$/, file, *rest
@wiki = wiki
@file = file
@page_url = "https://#{wiki}.fandom.com/wiki/File:#{file}"
# https://static.zerochan.net/Fullmetal.Alchemist.full.2831797.png
# https://s1.zerochan.net/Cocoa.Cookie.600.2957938.jpg
# http://static.zerochan.net/full/24/13/90674.jpg
in _, "zerochan.net", *subdirs, /(\d+)\.(jpg|png|gif)$/
@work_id = $1
@page_url = "https://www.zerochan.net/#{@work_id}#full"
# http://www.zerochan.net/full/1567893
in _, "zerochan.net", "full", /^\d+$/ => work_id
@work_id = work_id
@page_url = "https://www.zerochan.net/#{@work_id}#full"
else
end
end
end

View File

@@ -52,6 +52,10 @@ class Source::URL::Plurk < Source::URL
host == "images.plurk.com"
end
def page_url
"https://www.plurk.com/p/#{work_id}" if work_id.present?
end
def profile_url
"https://www.plurk.com/#{username}" if username.present?
end

View File

@@ -43,6 +43,10 @@ class Source::URL::Skeb < Source::URL
end
end
def page_url
"https://skeb.jp/@#{username}/works/#{work_id}" if username.present? && work_id.present?
end
def profile_url
"https://skeb.jp/@#{username}" if username.present?
end

View File

@@ -85,8 +85,7 @@ class Source::URL::Tumblr < Source::URL
end
def page_url
return nil unless @blog_name.present? && @work_id.present?
"https://#{@blog_name}.tumblr.com/post/#{@work_id}"
"https://#{blog_name}.tumblr.com/post/#{work_id}" if blog_name.present? && work_id.present?
end
def profile_url

View File

@@ -72,8 +72,7 @@ class Source::URL::TwitPic < Source::URL
end
def page_url
return nil unless base36_id.present?
"https://twitpic.com/#{base36_id}"
"https://twitpic.com/#{base36_id}" if base36_id.present?
end
def profile_url

View File

@@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search]
attr_reader :status_id, :twitter_username, :user_id
attr_reader :status_id, :username, :user_id
def self.match?(url)
return false if Source::URL::TwitPic.match?(url) # TwitPic uses https://o.twimg.com/ URLs
@@ -45,12 +45,12 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1
# https://twitter.com/sato_1_11/status/1496489742791475201/photo/2
in "twitter.com", username, "status", status_id, *rest
@twitter_username = username
@username = username
@status_id = status_id
# https://twitter.com/motty08111213
in "twitter.com", username, *rest
@twitter_username = username unless username.in?(RESERVED_USERNAMES)
@username = username unless username.in?(RESERVED_USERNAMES)
# https://twitter.com/intent/user?user_id=1485229827984531457
in "twitter.com", "intent", "user" if params[:user_id].present?
@@ -58,7 +58,7 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/intent/user?screen_name=ryuudog_NFT
in "twitter.com", "intent", "user" if params[:screen_name].present?
@twitter_username = params[:screen_name]
@username = params[:screen_name]
# https://twitter.com/i/user/889592953
in "twitter.com", "i", "user", user_id
@@ -101,9 +101,17 @@ class Source::URL::Twitter < Source::URL
"#{site}/#{@file_path}:orig"
end
def page_url
if username.present? && status_id.present?
"https://twitter.com/#{username}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def profile_url
if twitter_username.present?
"https://twitter.com/#{twitter_username}"
if username.present?
"https://twitter.com/#{username}"
elsif user_id.present?
# "https://twitter.com/i/user/#{user_id}
"https://twitter.com/intent/user?user_id=#{user_id}"

View File

@@ -104,7 +104,7 @@ class Source::URL::Weibo < Source::URL
end
end
def normalized_url
def page_url
if @artist_short_id.present? && @illust_base62_id.present?
"https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}"
elsif mobile_url.present?

View File

@@ -23,7 +23,6 @@ module Sources
Strategies::Foundation,
Strategies::Plurk,
Strategies::Tinami,
Strategies::TwitPic,
Strategies::Fantia,
]
end
@@ -36,9 +35,5 @@ module Sources
def self.canonical(url, referer)
find(url, referer).canonical_url
end
def self.normalize_source(url)
find(url).normalize_for_source || url
end
end
end

View File

@@ -52,16 +52,6 @@ module Sources::Strategies
end
end
def normalize_for_source
return if project_id.blank?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.artstation.com/projects/#{project_id}"
else
"https://www.artstation.com/artwork/#{project_id}"
end
end
def image_urls_from_api
api_response[:assets].to_a.map do |asset|
if asset[:asset_type] == "image"

View File

@@ -138,12 +138,6 @@ module Sources
end
memoize :http_downloader
# Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead.
# This function should never make any network call, even indirectly. Return nil to never normalize.
def normalize_for_source
nil
end
def artists
ArtistFinder.find_artists(profile_url)
end

View File

@@ -65,10 +65,6 @@ module Sources
end
end
def normalize_for_source
page_url_from_image_url
end
def profile_url
return nil if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}"

View File

@@ -34,19 +34,6 @@ module Sources
end
end
def normalize_for_source
if illust_id.present?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}"
elsif artist_id_from_url.present?
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}"
end
elsif artist_id_from_url.present?
# Cover images
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}"
end
end
def profile_url
return if artist_name.blank?

View File

@@ -128,10 +128,6 @@ module Sources::Strategies
DText.from_html(artist_commentary_desc)
end
def normalize_for_source
page_url
end
def work_type
parsed_url.work_type || parsed_referer&.work_type
end

View File

@@ -76,10 +76,6 @@ module Sources
DText.from_html(artist_commentary_desc)
end
def normalize_for_source
page_url
end
def api_response
return {} if page.nil?

View File

@@ -68,10 +68,6 @@ module Sources
DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
end
def normalize_for_source
page_url
end
def illust_id
parsed_url.work_id || parsed_referer&.work_id
end

View File

@@ -47,10 +47,6 @@ module Sources
page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html
end
def normalize_for_source
page_url
end
def illust_id
parsed_url.work_id || parsed_referer&.work_id
end

View File

@@ -79,10 +79,6 @@ module Sources::Strategies
api_response.tags
end
def normalize_for_source
page_url
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element|
if element.name == "a"

View File

@@ -25,17 +25,6 @@ module Sources
image_urls.first
end
def normalize_for_source
id = post_id_from_url
md5 = post_md5_from_url
if id.present?
"https://#{domain}/post/show/#{id}"
elsif md5.present?
"https://#{domain}/post?tags=md5:#{md5}"
end
end
def tags
api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]

View File

@@ -84,10 +84,6 @@ module Sources
end
end
def normalize_for_source
page_url
end
def user_name
parsed_url.username || parsed_referer&.username
end

View File

@@ -66,10 +66,6 @@ module Sources
end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp
end
def normalize_for_source
page_url
end
def tag_name
return if api_client&.user_id.blank?
"nicoseiga#{api_client.user_id}"

View File

@@ -109,12 +109,6 @@ module Sources
artist_id_from_url || artist_id_from_page
end
def normalize_for_source
return if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"
end
def doujin?
page&.at("#dojin_left").present?
end

View File

@@ -18,112 +18,6 @@ module Sources
def artists
ArtistFinder.find_artists(url)
end
def normalize_for_source
case url
when %r{\Ahttp://www\.karabako\.net/images(?:ub)?/karabako_(\d+)(?:_\d+)?\.}i
"http://www.karabako.net/post/view/#{$1}"
# XXX http://twipple.jp is defunct
# http://p.twpl.jp/show/orig/myRVs
when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i
"http://p.twipple.jp/#{$1}"
when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i
username = $1
filename = $2
"http://#{username}.blog.fc2.com/img/#{filename}/"
when %r{\Ahttps?://diary(\d)?\.fc2\.com/user/([^/]+)/img/(\d+)_(\d+)/(\d+)\.}i
server_id = $1
username = $2
year = $3
month = $4
day = $5
"http://diary#{server_id}.fc2.com/cgi-sys/ed.cgi/#{username}?Y=#{year}&M=#{month}&D=#{day}"
when %r{\Ahttps?://(?:fbcdn-)?s(?:content|photos)-[^/]+\.(?:fbcdn|akamaihd)\.net/hphotos-.+/\d+_(\d+)_(?:\d+_){1,3}[no]\.}i
"https://www.facebook.com/photo.php?fbid=#{$1}"
when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i
"https://chan.sankakucomplex.com/en/post/show?md5=#{$1}"
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i
"https://www.zerochan.net/#{$1}#full"
when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i
"http://gallery.minitokyo.net/download/#{$1}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
when %r{\Ahttps?://(?:\w+\.)?gelbooru\.com//?(?:images|samples)/(?:\d+|\h\h/\h\h)/(?:sample_)?(?<md5>\h{32})\.}i
"https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{$~[:md5]}"
when %r{\Ahttps?://(?:slot\d*\.)?im(?:g|ages)\d*\.wikia\.(?:nocookie\.net|com)/(?:_{2}cb\d{14}/)?([^/]+)(?:/[a-z]{2})?/images/(?:(?:thumb|archive)?/)?[a-f0-9]/[a-f0-9]{2}/(?:\d{14}(?:!|%21))?([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://vignette(?:\d*)\.wikia\.nocookie\.net/([^/]+)/images/[a-f0-9]/[a-f0-9]{2}/([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://e-shuushuu.net/images/\d{4}-(?:\d{2}-){2}(\d+)}i
"https://e-shuushuu.net/image/#{$1}"
when %r{\Ahttps?://jpg\.nijigen-daiaru\.com/(\d+)}i
"http://nijigen-daiaru.com/book.php?idb=#{$1}"
when %r{\Ahttps?://sozai\.doujinantena\.com/contents_jpg/([a-f0-9]{32})/}i
"http://doujinantena.com/page.php?id=#{$1}"
when %r{\Ahttps?://rule34-(?:data-\d{3}|images)\.paheal\.net/(?:_images/)?([a-f0-9]{32})}i
"https://rule34.paheal.net/post/list/md5:#{$1}/1"
when %r{\Ahttps?://shimmie\.katawa-shoujo\.com/image/(\d+)}i
"https://shimmie.katawa-shoujo.com/post/view/#{$1}"
when %r{\Ahttps://(?:(?:\w+\.)?rule34\.xxx|img\.booru\.org/(?:rule34|r34))(?:/(?:img/rule34|r34))?/{1,2}images/\d+/([a-f0-9]{32})\.}i
"https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
when %r{(\Ahttps?://.+)/diarypro/d(?:ata/upfile/|iary\.cgi\?mode=image&upfile=)(\d+)}i
base_url = $1
entry_no = $2
"#{base_url}/diarypro/diary.cgi?no=#{entry_no}"
# XXX site is defunct
when %r{\Ahttps?://i(?:\d)?\.minus\.com/(?:i|j)([^\.]{12,})}i
"http://minus.com/i/#{$1}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg
when %r{\Ahttps?://\w+\.photozou\.jp/pub/\d+/(?<artist_id>\d+)/photo/(?<photo_id>\d+)_.*$}i
"https://photozou.jp/photo/show/#{$~[:artist_id]}/#{$~[:photo_id]}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i
"https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/(?<image_id>\d+)\w*\.[a-z]+\z}i
"https://hitomi.la/reader/#{$~[:gallery_id]}.html##{$~[:image_id].to_i}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/\w*\.[a-z]+\z}i
"https://hitomi.la/galleries/#{$~[:gallery_id]}.html"
else
nil
end
end
end
end
end

View File

@@ -96,11 +96,6 @@ module Sources
api_illust[:description]
end
def normalize_for_source
return nil if illust_id.blank?
"https://www.pixiv.net/artworks/#{illust_id}"
end
def tag_name
moniker
end

View File

@@ -110,10 +110,6 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end
def normalize_for_source
page_url
end
memoize :page, :page_json, :api_replies
end
end

View File

@@ -48,10 +48,6 @@ module Sources
"https://skeb.jp/@#{artist_name}/works/#{illust_id}"
end
def normalize_for_source
page_url
end
def api_url
return nil unless artist_name.present? && illust_id.present?
"https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}"

View File

@@ -83,10 +83,6 @@ module Sources::Strategies
super(tag)
end
def normalize_for_source
parsed_url.page_url
end
def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip
end

View File

@@ -1,14 +0,0 @@
# frozen_string_literal: true
# @see Source::URL::TwitPic
module Sources::Strategies
class TwitPic < Base
def match?
Source::URL::TwitPic === parsed_url
end
def normalize_for_source
parsed_url.page_url || url
end
end
end

View File

@@ -93,14 +93,6 @@ module Sources::Strategies
api_response[:full_text].to_s
end
def normalize_for_source
if tag_name_from_url.present? && status_id.present?
"https://twitter.com/#{tag_name_from_url}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def tags
api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
[hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
@@ -150,7 +142,7 @@ module Sources::Strategies
end
def tag_name_from_url
parsed_url.twitter_username || parsed_referer&.twitter_username
parsed_url.username || parsed_referer&.username
end
memoize :api_response

View File

@@ -87,10 +87,6 @@ module Sources
end
end
def normalize_for_source
parsed_url.normalized_url
end
def api_response
return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?

View File

@@ -307,15 +307,16 @@ class Post < ApplicationRecord
end
end
def parsed_source
Source::URL.parse(source) if web_source?
end
def normalized_source
return source unless web_source?
Sources::Strategies.normalize_source(source)
parsed_source&.page_url || source
end
def source_domain
return "" unless web_source?
Danbooru::URL.parse(normalized_source)&.domain.to_s
parsed_source&.domain.to_s
end
end

View File

@@ -194,22 +194,13 @@ module Sources
assert_equal("sa-dui", site.artist_name)
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://www.artstation.com/artwork/ghost-in-the-shell-fandom"
source2 = "https://anubis1982918.artstation.com/projects/qPVGP/"
source3 = "https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal("https://anubis1982918.artstation.com/projects/qPVGP", Sources::Strategies.normalize_source(source2))
assert_equal("https://dudeunderscore.artstation.com/projects/NoNmD", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"
bad_source2 = "https://www.artstation.com"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
context "generating page urls" do
should "work" do
assert_equal("https://www.artstation.com/artwork/ghost-in-the-shell-fandom", Source::URL.page_url("https://www.artstation.com/artwork/ghost-in-the-shell-fandom"))
assert_equal("https://www.artstation.com/artwork/qPVGP", Source::URL.page_url("https://anubis1982918.artstation.com/projects/qPVGP/"))
assert_equal("https://www.artstation.com/artwork/NoNmD", Source::URL.page_url("https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"))
assert_nil(Source::URL.page_url("http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"))
assert_nil(Source::URL.page_url("https://www.artstation.com"))
end
end
end

View File

@@ -377,8 +377,8 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls " do
should "work" do
source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg"
source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg"
source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png"
@@ -386,21 +386,18 @@ module Sources
source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc"
source6 = "https://fav.me/dbc3a48"
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Sources::Strategies.normalize_source(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Sources::Strategies.normalize_source(source6))
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Source::URL.page_url(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Source::URL.page_url(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Source::URL.page_url(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Source::URL.page_url(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Source::URL.page_url(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Source::URL.page_url(source6))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"
bad_source2 = "http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"
bad_source3 = "https://deviantart.net"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"))
assert_nil(Source::URL.page_url("http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"))
assert_nil(Source::URL.page_url("https://deviantart.net"))
end
end
end

View File

@@ -135,17 +135,15 @@ module Sources
end
end
context "normalizing for source" do
should "normalize cover images to the profile link" do
context "generating page urls" do
should "convert cover images to the profile url" do
cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg"
assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Sources::Strategies.normalize_source(cover))
assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Source::URL.page_url(cover))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg"
bad_source2 = "https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg"))
assert_nil(Source::URL.page_url("https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png"))
end
end
end

View File

@@ -82,20 +82,16 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg"
source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg"
source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver."
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://pictures.hentai-foundry.com/a/AnimeFlux"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Source::URL.page_url(source3))
assert_nil(Source::URL.page_url("https://pictures.hentai-foundry.com/a/AnimeFlux"))
end
end

View File

@@ -122,25 +122,17 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://pawoo.net/@evazion/19451018/"
source2 = "https://pawoo.net/web/statuses/19451018/favorites"
source3 = "https://baraag.net/@bardbot/105732813175612920/"
assert_equal("https://pawoo.net/@evazion/19451018", Sources::Strategies.normalize_source(source1))
assert_equal("https://pawoo.net/web/statuses/19451018", Sources::Strategies.normalize_source(source2))
assert_equal("https://baraag.net/@bardbot/105732813175612920", Sources::Strategies.normalize_source(source3))
context "generating page urls" do
should "work" do
assert_equal("https://pawoo.net/@evazion/19451018", Source::URL.page_url("https://pawoo.net/@evazion/19451018/"))
assert_equal("https://pawoo.net/web/statuses/19451018", Source::URL.page_url("https://pawoo.net/web/statuses/19451018/favorites"))
assert_equal("https://baraag.net/@bardbot/105732813175612920", Source::URL.page_url("https://baraag.net/@bardbot/105732813175612920/"))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"
bad_source2 = "https://pawoo.net/@evazion/media"
bad_source3 = "https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"))
assert_nil(Source::URL.page_url("https://pawoo.net/@evazion/media"))
assert_nil(Source::URL.page_url("https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png"))
end
end

View File

@@ -111,29 +111,29 @@ module Sources
end
end
context "normalizing for source" do
should "normalize yande.re sources correctly" do
context "generating page urls" do
should "generate yande.re urls correctly" do
source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png"
source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg"
source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg"
source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png"
source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg"
assert_equal("https://yande.re/post/show/377828", Sources::Strategies.normalize_source(source1))
assert_equal("https://yande.re/post/show/349790", Sources::Strategies.normalize_source(source2))
assert_equal("https://yande.re/post/show/469784", Sources::Strategies.normalize_source(source3))
assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", Sources::Strategies.normalize_source(source4))
assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", Sources::Strategies.normalize_source(source5))
assert_equal("https://yande.re/post/show/377828", Source::URL.page_url(source1))
assert_equal("https://yande.re/post/show/349790", Source::URL.page_url(source2))
assert_equal("https://yande.re/post/show/469784", Source::URL.page_url(source3))
assert_equal("https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6", Source::URL.page_url(source4))
assert_equal("https://yande.re/post/show?md5=22577d2344fe694cf47f80563031b3cd", Source::URL.page_url(source5))
end
should "normalize konachan.com sources correctly" do
should "generate konachan.com urls correctly" do
source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg"
source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"
source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg"
assert_equal("https://konachan.com/post/show/270807", Sources::Strategies.normalize_source(source1))
assert_equal("https://konachan.com/post/show/270803", Sources::Strategies.normalize_source(source2))
assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", Sources::Strategies.normalize_source(source3))
assert_equal("https://konachan.com/post/show/270807", Source::URL.page_url(source1))
assert_equal("https://konachan.com/post/show/270803", Source::URL.page_url(source2))
assert_equal("https://konachan.com/post/show?md5=99a3c4f10c327d54486259a74173fc0b", Source::URL.page_url(source3))
end
end
end

View File

@@ -98,16 +98,10 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
source = "https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181"
assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Sources::Strategies.normalize_source(source))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
context "generating page urls" do
should "work" do
assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Source::URL.page_url("https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181"))
assert_nil(Source::URL.page_url("https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"))
end
end
end

View File

@@ -159,22 +159,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf"
source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893"
source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"
source4 = "http://seiga.nicovideo.jp/image/source?id=3312222"
assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Sources::Strategies.normalize_source(source1))
assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Sources::Strategies.normalize_source(source2))
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Sources::Strategies.normalize_source(source3))
assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://seiga.nicovideo.jp"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Source::URL.page_url(source1))
assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Source::URL.page_url(source2))
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Source::URL.page_url(source3))
assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://seiga.nicovideo.jp"))
end
end

View File

@@ -317,23 +317,23 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png"
source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png"
assert_equal("https://nijie.info/view.php?id=218856", Sources::Strategies.normalize_source(source1))
assert_equal("https://nijie.info/view.php?id=287736", Sources::Strategies.normalize_source(source2))
assert_equal("https://nijie.info/view.php?id=218856", Source::URL.page_url(source1))
assert_equal("https://nijie.info/view.php?id=287736", Source::URL.page_url(source2))
end
should "avoid normalizing unnormalizable urls" do
should "handle inconvertible urls" do
bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg"
bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png"
bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
assert_nil(Source::URL.page_url(bad_source1))
assert_nil(Source::URL.page_url(bad_source2))
assert_nil(Source::URL.page_url(bad_source3))
end
end

View File

@@ -29,30 +29,30 @@ module Sources
context "normalizing for source" do
should "normalize karabako links" do
source = "http://www.karabako.net/images/karabako_38835.jpg"
assert_equal("http://www.karabako.net/post/view/38835", Sources::Strategies.normalize_source(source))
assert_equal("http://www.karabako.net/post/view/38835", Source::URL.page_url(source))
end
should "normalize twipple links" do
source = "http://p.twpl.jp/show/orig/mI2c3"
assert_equal("http://p.twipple.jp/mI2c3", Sources::Strategies.normalize_source(source))
assert_equal("http://p.twipple.jp/mI2c3", Source::URL.page_url(source))
end
should "normalize fc2 links" do
source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png"
source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg"
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png/", Sources::Strategies.normalize_source(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Sources::Strategies.normalize_source(source2))
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png", Source::URL.page_url(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Source::URL.page_url(source2))
end
should "normalize facebook links" do
source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg"
assert_equal("https://www.facebook.com/photo.php?fbid=576443445841777", Sources::Strategies.normalize_source(source))
assert_equal("https://www.facebook.com/photo?fbid=576443445841777", Source::URL.page_url(source))
end
should "normalize sankaku links" do
source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848"
assert_equal("https://chan.sankakucomplex.com/en/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Sources::Strategies.normalize_source(source))
assert_equal("https://chan.sankakucomplex.com/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Source::URL.page_url(source))
end
should "normalize zerochan links" do
@@ -60,17 +60,17 @@ module Sources
source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg"
source3 = "http://www.zerochan.net/full/1567893"
assert_equal("https://www.zerochan.net/183273#full", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.zerochan.net/411536#full", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.zerochan.net/1567893#full", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.zerochan.net/183273#full", Source::URL.page_url(source1))
assert_equal("https://www.zerochan.net/411536#full", Source::URL.page_url(source2))
assert_equal("https://www.zerochan.net/1567893#full", Source::URL.page_url(source3))
end
should "normalize minitokyo links" do
source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg"
source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019"
assert_equal("http://gallery.minitokyo.net/download/365677", Sources::Strategies.normalize_source(source1))
assert_equal("http://gallery.minitokyo.net/download/199164", Sources::Strategies.normalize_source(source2))
assert_equal("http://gallery.minitokyo.net/view/365677", Source::URL.page_url(source1))
assert_equal("http://gallery.minitokyo.net/view/199164", Source::URL.page_url(source2))
end
should "normalize gelbooru links" do
@@ -78,87 +78,83 @@ module Sources
source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png"
source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg"
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Sources::Strategies.normalize_source(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Sources::Strategies.normalize_source(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Sources::Strategies.normalize_source(source3))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Source::URL.page_url(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3))
end
should "normalize wikia links" do
source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954"
assert_equal("https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png", Sources::Strategies.normalize_source(source))
assert_equal("https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png", Source::URL.page_url(source))
end
should "normalize e-shuushuu links" do
source = "http://e-shuushuu.net/images/2014-07-22-662472.png"
assert_equal("https://e-shuushuu.net/image/662472", Sources::Strategies.normalize_source(source))
assert_equal("https://e-shuushuu.net/image/662472", Source::URL.page_url(source))
end
should "normalize nijigen-daiaru links" do
source = "http://jpg.nijigen-daiaru.com/19909/029.jpg"
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Sources::Strategies.normalize_source(source))
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Source::URL.page_url(source))
end
should "normalize doujinantena links" do
source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg"
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Sources::Strategies.normalize_source(source))
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Source::URL.page_url(source))
end
should "normalize paheal.net links" do
source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg"
assert_equal("https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1", Sources::Strategies.normalize_source(source))
assert_equal("https://rule34.paheal.net/post/view/852405", Source::URL.page_url(source))
end
should "normalize shimmie.katawa-shoujo.com links" do
source = "http://shimmie.katawa-shoujo.com/image/2740.png"
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Sources::Strategies.normalize_source(source))
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Source::URL.page_url(source))
end
should "normalize rule34.xxx links" do
source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807"
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Sources::Strategies.normalize_source(source))
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Source::URL.page_url(source))
end
should "normalize diarypro links" do
source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg"
source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg"
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Sources::Strategies.normalize_source(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Sources::Strategies.normalize_source(source2))
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Source::URL.page_url(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Source::URL.page_url(source2))
end
should "normalize minus.com links" do
source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg"
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Sources::Strategies.normalize_source(source))
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Source::URL.page_url(source))
end
should "normalize photozou links" do
source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg"
source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg"
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Sources::Strategies.normalize_source(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Sources::Strategies.normalize_source(source2))
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Source::URL.page_url(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Source::URL.page_url(source2))
end
should "normalize toranoana links" do
source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg"
source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg"
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695/", Sources::Strategies.normalize_source(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417/", Sources::Strategies.normalize_source(source2))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695", Source::URL.page_url(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417", Source::URL.page_url(source2))
end
should "normalize hitomi.la links" do
source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png"
source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg"
assert_equal("https://hitomi.la/galleries/883451.html", Sources::Strategies.normalize_source(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Sources::Strategies.normalize_source(source2))
assert_equal("https://hitomi.la/galleries/883451.html", Source::URL.page_url(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Source::URL.page_url(source2))
end
should "leave unknown sources as they are" do
source1 = "https://google.com"
source2 = "a bad non-http source"
source3 = "https://example.com/Folder/中央大学.html"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source3, Sources::Strategies.normalize_source(source3))
assert_nil(Source::URL.page_url("https://google.com"))
assert_nil(Source::URL.page_url("a bad non-http source"))
assert_nil(Source::URL.page_url("https://example.com/Folder/中央大学.html"))
end
end
end

View File

@@ -348,19 +348,19 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png"
source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg"
source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip"
assert_equal("https://www.pixiv.net/artworks/39749565", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Sources::Strategies.normalize_source(source5))
assert_equal("https://www.pixiv.net/artworks/39749565", Source::URL.page_url(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Source::URL.page_url(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Source::URL.page_url(source5))
end
end
end

View File

@@ -99,10 +99,10 @@ module Sources
end
end
context "normalizing for source" do
should "avoid normalizing unnormalizable urls" do
context "generating page urls" do
should "handle inconvertible urls" do
bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_nil(Source::URL.page_url(bad_source))
end
end
end

View File

@@ -228,22 +228,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://octrain1020.tumblr.com/post/190713122589"
source2 = "https://octrain1020.tumblr.com/image/190713122589"
source3 = "https://octrain1020.tumblr.com/image/190713122589#asd"
source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://octrain1020.tumblr.com/"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source1, Source::URL.page_url(source2))
assert_equal(source1, Source::URL.page_url(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://octrain1020.tumblr.com/"))
end
end
end

View File

@@ -2,20 +2,11 @@ require 'test_helper'
module Sources
class TwitPicTest < ActiveSupport::TestCase
context "normalizing for source" do
should "normalize d3j5vwomefv46c.cloudfront.net links" do
source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"
assert_equal("https://twitpic.com/dks0tb", Sources::Strategies.normalize_source(source))
end
should "normalize dn3pm25xmtlyu.cloudfront.net links" do
source = "https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"
assert_equal("https://twitpic.com/dvitq3", Sources::Strategies.normalize_source(source))
end
should "normalize o.twimg.com links" do
source = "https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"
assert_equal("https://twitpic.com/dtnuru", Sources::Strategies.normalize_source(source))
context "generating page urls" do
should "work" do
assert_equal("https://twitpic.com/dks0tb", Source::URL.page_url("http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"))
assert_equal("https://twitpic.com/dvitq3", Source::URL.page_url("https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"))
assert_equal("https://twitpic.com/dtnuru", Source::URL.page_url("https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"))
end
end
end

View File

@@ -291,18 +291,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://twitter.com/i/web/status/1261877313349640194"
source2 = "https://twitter.com/BOW999/status/1261877313349640194"
source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1"
source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source2, Sources::Strategies.normalize_source(source3))
assert_equal(source2, Sources::Strategies.normalize_source(source4))
assert_equal("https://www.twitter.com/irt_5433", Sources::Strategies.normalize_source("https://www.twitter.com/irt_5433"))
assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source2, Source::URL.page_url(source2))
assert_equal(source2, Source::URL.page_url(source3))
assert_equal(source2, Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://www.twitter.com/irt_5433"))
end
end
end

View File

@@ -113,25 +113,19 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime"
source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81"
source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635"
source4 = "https://tw.weibo.com/SEINEN/4098035921690224"
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Sources::Strategies.normalize_source(source1))
assert_equal("https://m.weibo.cn/detail/4242129997905387", Sources::Strategies.normalize_source(source2))
assert_equal("https://m.weibo.cn/status/4173757483008088", Sources::Strategies.normalize_source(source3))
assert_equal("https://m.weibo.cn/detail/4098035921690224", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://weibo.com/u/"
bad_source2 = "https://www.weibo.com/4ubergine/photos"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Source::URL.page_url(source1))
assert_equal("https://m.weibo.cn/detail/4242129997905387", Source::URL.page_url(source2))
assert_equal("https://m.weibo.cn/status/4173757483008088", Source::URL.page_url(source3))
assert_equal("https://m.weibo.cn/detail/4098035921690224", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://weibo.com/u/"))
assert_nil(Source::URL.page_url("https://www.weibo.com/4ubergine/photos"))
end
end
end