sources: refactor normalize_for_source.

`normalize_for_source` was used to convert image URLs to page URLs when displaying sources
on the post show page. Move all the code for converting image URLs to page URLs from
`Sources::Strategies#normalize_for_source` to `Source::URL#page_url`.

Before we had to be very careful in source strategies not to make any network calls in
`normalize_for_source`, since it was used in the view for the post show page. Now all the
code for generating page URLs is isolated in Source::URL, which makes source strategies
simpler. It also makes it easier to check if a source is an image URL or page URL, and if
the image URL is convertible to a page URL, which will make autotagging bad_link or
bad_source feasible.

Finally, this fixes it to generate better page URLs in a handful of cases:

* https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP
* https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6
* http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677
* https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png
* https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
This commit is contained in:
evazion
2022-03-23 00:41:56 -05:00
parent 770f850c66
commit 3aa5cab2aa
59 changed files with 471 additions and 484 deletions

View File

@@ -16,7 +16,7 @@
# url = Source::URL.parse("https://twitter.com/yasunavert/status/1496123903290314755") # url = Source::URL.parse("https://twitter.com/yasunavert/status/1496123903290314755")
# url.site_name # => "Twitter" # url.site_name # => "Twitter"
# url.status_id # => "1496123903290314755" # url.status_id # => "1496123903290314755"
# url.twitter_username # => "yasunavert" # url.username # => "yasunavert"
# #
# @see Danbooru::URL # @see Danbooru::URL
module Source module Source
@@ -53,7 +53,7 @@ module Source
# @return [Source::URL] # @return [Source::URL]
def self.parse!(url) def self.parse!(url)
url = Danbooru::URL.new(url) url = Danbooru::URL.new(url)
subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL::Null
subclass.new(url) subclass.new(url)
end end
@@ -78,39 +78,30 @@ module Source
# #
# @return [String] # @return [String]
def site_name def site_name
# XXX should go in dedicated subclasses. # "Source::URL::NicoSeiga" => "Nico Seiga"
case host self.class.name.demodulize.titleize
when /ask\.fm\z/i end
"Ask.fm"
when /bcy\.net\z/i # Convert an image URL to the URL of the page containing the image, or
"BCY" # return nil if it's not possible to convert the current URL to a page URL.
when /booth\.pm\z/i #
"Booth.pm" # When viewing a post, the source will be shown as the page URL if it's
when /circle\.ms\z/i # possible to convert the source from an image URL to a page URL.
"Circle.ms" #
when /dlsite\.(com|net)\z/i # Examples:
"DLSite" #
when /doujinshi\.mugimugi\.org\z/i # * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png
"Doujinshi.org" # => https://www.pixiv.net/artworks/46324488
when /fc2\.com\z/i #
"FC2" # * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg
when /ko-fi\.com\z/i # => https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896
"Ko-fi" #
when /mixi\.jp\z/i # * https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig
"Mixi.jp" # => nil
when /piapro\.jp\z/i #
"Piapro.jp" # @return [String, nil]
when /sakura\.ne\.jp\z/i def page_url
"Sakura.ne.jp" nil
else
if self.class == Source::URL
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
else
# "Source::URL::NicoSeiga" => "Nico Seiga"
self.class.name.demodulize.titleize
end
end
end end
# Convert the current URL into a profile URL, or return nil if it's not # Convert the current URL into a profile URL, or return nil if it's not
@@ -134,6 +125,14 @@ module Source
nil nil
end end
def self.page_url(url)
Source::URL.parse(url)&.page_url
end
def self.profile_url(url)
Source::URL.parse(url)&.profile_url
end
protected def initialize(...) protected def initialize(...)
super(...) super(...)
parse parse

View File

@@ -79,6 +79,10 @@ class Source::URL::ArtStation < Source::URL
end end
end end
def page_url
"https://www.artstation.com/artwork/#{work_id}" if work_id.present?
end
def profile_url def profile_url
"https://www.artstation.com/#{username}" if username.present? "https://www.artstation.com/#{username}" if username.present?
end end

View File

@@ -78,6 +78,17 @@ class Source::URL::Fanbox < Source::URL
to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url? to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url?
end end
def page_url
if username.present? && work_id.present?
"https://#{username}.fanbox.cc/posts/#{work_id}"
elsif user_id.present? && work_id.present?
"https://www.pixiv.net/fanbox/creator/#{user_id}/post/#{work_id}"
elsif user_id.present? && image_url?
# Use profile url as page url for cover images (XXX may cause problems with bad_source detection)
"https://www.pixiv.net/fanbox/creator/#{user_id}"
end
end
def profile_url def profile_url
if username.present? if username.present?
"https://#{username}.fanbox.cc" "https://#{username}.fanbox.cc"

View File

@@ -55,6 +55,7 @@ class Source::URL::Fantia < Source::URL
# https://fantia.jp/fanclubs/64496 # https://fantia.jp/fanclubs/64496
# https://fantia.jp/fanclubs/1654/posts # https://fantia.jp/fanclubs/1654/posts
# https://job.fantia.jp/fanclubs/5734
in _, "fanclubs", /\d+/ => fanclub_id, *rest in _, "fanclubs", /\d+/ => fanclub_id, *rest
@fanclub_id = fanclub_id @fanclub_id = fanclub_id

View File

@@ -1,7 +1,7 @@
# frozen_string_literal: true # frozen_string_literal: true
class Source::URL::Fc2 < Source::URL class Source::URL::Fc2 < Source::URL
attr_reader :username, :profile_url attr_reader :username, :profile_url, :page_url
def self.match?(url) def self.match?(url)
url.domain.in?(%w[fc2.com fc2blog.net fc2blog.us]) url.domain.in?(%w[fc2.com fc2blog.net fc2blog.us])
@@ -48,6 +48,7 @@ class Source::URL::Fc2 < Source::URL
# http://blog.fc2.com/g/b/o/gbot/20071023195141.jpg # http://blog.fc2.com/g/b/o/gbot/20071023195141.jpg
in (/^blog-imgs-\d+(-origin)?$/ | "blog"), "fc2", "com", /^\w$/, /^\w$/, /^\w$/, username, file in (/^blog-imgs-\d+(-origin)?$/ | "blog"), "fc2", "com", /^\w$/, /^\w$/, /^\w$/, username, file
@username = username @username = username
@page_url = "http://#{username}.blog.fc2.com/img/#{file}"
@profile_url = "http://#{username}.blog.fc2.com" @profile_url = "http://#{username}.blog.fc2.com"
# http://diary.fc2.com/user/yuuri/img/2005_12/26.jpg # http://diary.fc2.com/user/yuuri/img/2005_12/26.jpg
@@ -55,6 +56,9 @@ class Source::URL::Fc2 < Source::URL
# http://diary.fc2.com/user/kazuharoom/img/2015_5/22.jpg # http://diary.fc2.com/user/kazuharoom/img/2015_5/22.jpg
in /diary\d*$/, "fc2", "com", "user", username, "img", date, file in /diary\d*$/, "fc2", "com", "user", username, "img", date, file
@username = username @username = username
@year, @month = date.split("_")
@day = filename
@page_url = "http://#{host}/cgi-sys/ed.cgi/#{username}?Y=#{@year}&M=#{@month}&D=#{@day}"
@profile_url = "http://diary.fc2.com/cgi-sys/ed.cgi/#{username}" @profile_url = "http://diary.fc2.com/cgi-sys/ed.cgi/#{username}"
# http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom/?Y=2012&M=10&D=22 # http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom/?Y=2012&M=10&D=22

View File

@@ -6,6 +6,7 @@
# Unsupported patterns: # Unsupported patterns:
# * https://foundation.app/@ <- This seems to be a novelty account. # * https://foundation.app/@ <- This seems to be a novelty account.
# * https://foundation.app/mochiiimo <- no @ # * https://foundation.app/mochiiimo <- no @
# * https://foundation.app/collection/kgfgen
class Source::URL::Foundation < Source::URL class Source::URL::Foundation < Source::URL
attr_reader :username, :token_id, :work_id, :hash attr_reader :username, :token_id, :work_id, :hash

View File

@@ -63,6 +63,14 @@ class Source::URL::HentaiFoundry < Source::URL
end end
end end
def page_url
if username.present? && work_id.present?
"https://www.hentai-foundry.com/pictures/user/#{username}/#{work_id}"
elsif work_id.present?
"https://www.hentai-foundry.com/pic-#{work_id}"
end
end
def profile_url def profile_url
"https://www.hentai-foundry.com/user/#{username}" if username.present? "https://www.hentai-foundry.com/user/#{username}" if username.present?
end end

View File

@@ -34,6 +34,10 @@ class Source::URL::Instagram < Source::URL
end end
end end
def page_url
"https://www.instagram.com/p/#{work_id}/" if work_id.present?
end
def profile_url def profile_url
# Instagram URLs canonically end with "/" # Instagram URLs canonically end with "/"
"https://www.instagram.com/#{username}/" if username.present? "https://www.instagram.com/#{username}/" if username.present?

View File

@@ -52,6 +52,10 @@ class Source::URL::Lofter < Source::URL
"#{site}#{path}" if image_url? "#{site}#{path}" if image_url?
end end
def page_url
"https://#{username}.lofter.com/post/#{work_id}" if username.present? && work_id.present?
end
def profile_url def profile_url
"https://#{username}.lofter.com" if username.present? "https://#{username}.lofter.com" if username.present?
end end

View File

@@ -77,6 +77,14 @@ class Source::URL::Mastodon < Source::URL
full_image_url.present? full_image_url.present?
end end
def page_url
if username.present? && work_id.present?
"https://#{host}/@#{username}/#{work_id}"
elsif work_id.present?
"https://#{host}/web/statuses/#{work_id}"
end
end
def profile_url def profile_url
if username.present? if username.present?
"https://#{host}/@#{username}" "https://#{host}/@#{username}"

View File

@@ -85,6 +85,14 @@ class Source::URL::Moebooru < Source::URL
end end
end end
def page_url
if work_id.present?
"https://#{domain}/post/show/#{work_id}"
elsif md5.present?
"https://#{domain}/post/show?md5=#{md5}"
end
end
def self.full_image_url(site_name, md5, file_ext, post_id = nil) def self.full_image_url(site_name, md5, file_ext, post_id = nil)
case site_name case site_name
when "Yande.re" when "Yande.re"

View File

@@ -59,6 +59,12 @@ class Source::URL::Newgrounds < Source::URL
url.host == "art.ngfiles.com" url.host == "art.ngfiles.com"
end end
def page_url
if username.present? && work_title.present?
"https://www.newgrounds.com/art/view/#{username}/#{work_title}"
end
end
def profile_url def profile_url
"https://#{username}.newgrounds.com" if username.present? "https://#{username}.newgrounds.com" if username.present?
end end

View File

@@ -94,6 +94,10 @@ class Source::URL::Nijie < Source::URL
to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url? to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url?
end end
def page_url
"https://nijie.info/view.php?id=#{work_id}" if work_id.present?
end
def profile_url def profile_url
"https://nijie.info/members.php?id=#{user_id}" if user_id.present? "https://nijie.info/members.php?id=#{user_id}" if user_id.present?
end end

View File

@@ -0,0 +1,201 @@
# frozen_string_literal: true
class Source::URL::Null < Source::URL
attr_reader :work_id, :page_url
def self.match?(url)
true
end
def site_name
case host
when /ask\.fm\z/i
"Ask.fm"
when /bcy\.net\z/i
"BCY"
when /booth\.pm\z/i
"Booth.pm"
when /circle\.ms\z/i
"Circle.ms"
when /dlsite\.(com|net)\z/i
"DLSite"
when /doujinshi\.mugimugi\.org\z/i
"Doujinshi.org"
when /ko-fi\.com\z/i
"Ko-fi"
when /mixi\.jp\z/i
"Mixi.jp"
when /piapro\.jp\z/i
"Piapro.jp"
when /sakura\.ne\.jp\z/i
"Sakura.ne.jp"
else
# "www.melonbooks.co.jp" => "Melonbooks"
parsed_domain.sld.titleize
end
end
def parse
case [subdomain, domain, *path_segments]
# http://nekomataya.net/diarypro/data/upfile/66-1.jpg
# http://www117.sakura.ne.jp/~cat_rice/diarypro/data/upfile/31-1.jpg
# http://webknight0.sakura.ne.jp/cgi-bin/diarypro/data/upfile/9-1.jpg
in _, _, *subdirs, "diarypro", "data", "upfile", /^(\d+)-\d+\.(jpg|png|gif)$/ => file
@work_id = $1
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=723-4.jpg
# http://www.danshaku.sakura.ne.jp/cgi-bin/diarypro/diary.cgi?mode=image&upfile=56-1.jpg
# http://www.yanbow.com/~myanie/diarypro/diary.cgi?mode=image&upfile=279-1.jpg
in _, _, *subdirs, "diarypro", "diary.cgi" if params[:mode] == "image" && params[:upfile].present?
@work_id = params[:upfile][/^\d+/]
@page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/")
# http://com2.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/018.jpg
# http://sozai.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/009.jpg
in _, "doujinantena.com", "contents_jpg", /^\h{32}$/ => md5, *rest
@md5 = md5
@page_url = "http://doujinantena.com/page.php?id=#{md5}"
# https://e-shuushuu.net/images/2017-07-19-915628.jpeg
in _, "e-shuushuu.net", "images", /^\d{4}-\d{2}-\d{2}-(\d+)\.(jpeg|jpg|png|gif)$/i
@work_id = $1
@page_url = "https://e-shuushuu.net/image/#{@work_id}"
# https://scontent.fmnl9-2.fna.fbcdn.net/v/t1.6435-9/196345051_961754654392125_8855002558147907833_n.jpg?_nc_cat=103&ccb=1-5&_nc_sid=0debeb&_nc_ohc=EB1RGiEOtyEAX9XE7aL&_nc_ht=scontent.fmnl9-2.fna&oh=00_AT8NNz_keqQ6VJeC1UVSMULhjaP3iykm-ONSMR7IrtarUQ&oe=6257862E
# https://scontent.fmnl8-2.fna.fbcdn.net/v/t1.6435-9/fr/cp0/e15/q65/80900683_480934615898749_6481759463945535488_n.jpg?_nc_cat=107&ccb=1-3&_nc_sid=8024bb&_nc_ohc=cCYFUzyHDmUAX-YHJIw&_nc_ht=scontent.fmnl8-2.fna&oh=e45c3837afcfefb6a4d93adfecef88c1&oe=60F6E392
# https://scontent.fmnl13-1.fna.fbcdn.net/v/t31.18172-8/22861751_1362164640578443_432921612329393062_o.jpg
# https://scontent-sin1-1.xx.fbcdn.net/hphotos-xlp1/t31.0-8/s960x960/12971037_586686358150819_495608200196301072_o.jpg
in _, "fbcdn.net", *subdirs, /^\d+_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo?fbid=#{@work_id}"
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xlp1/t31.0-8/s960x960/13173066_623015164516858_1844421675339995359_o.jpg
# https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xpf1/v/t1.0-9/s720x720/12032214_991569624217563_4908408819297057893_n.png?oh=efe6ea26aed89c8a12ddc1832b1f0157&oe=5667D5B1&__gda__=1453845772_c742c726735047f2feb836b845ff296f
in /fbcdn/, "akamaihd.net", *subdirs, /^\d_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/
@work_id = $1
@page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
# https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4
# https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm
in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/
@md5 = $1
@page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
in _, "hitomi.la", "galleries", gallery_id, /^(\d+)\w*\.(jpg|png|gif)$/ => image_id
@gallery_id = gallery_id
@image_id = $1.to_i
@page_url = "https://hitomi.la/reader/#{gallery_id}.html##{@image_id}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
in _, "hitomi.la", "galleries", gallery_id, file
@gallery_id = gallery_id
@page_url = "https://hitomi.la/galleries/#{gallery_id}.html"
# http://www.karabako.net/images/karabako_43878.jpg
# http://www.karabako.net/imagesub/karabako_43222_215.jpg
in _, "karabako.net", ("images" | "imagesub"), /^karabako_(\d+)/
@work_id = $1
@page_url = "http://www.karabako.net/post/view/#{work_id}"
# http://static.minitokyo.net/downloads/31/33/764181.jpg
in _, "minitokyo.net", "downloads", /^\d{2}$/, /^\d{2}$/, file
@work_id = filename
@page_url = "http://gallery.minitokyo.net/view/#{@work_id}"
# http://i.minus.com/j2LcOC52dGLtB.jpg
# http://i5.minus.com/ik26grnRJAmYh.jpg
in _, "minus.com", /^[ij]([a-zA-Z0-9]{12,})\.(jpg|png|gif)$/
@work_id = $1
@page_url = "http://minus.com/i/#{@work_id}"
# http://jpg.nijigen-daiaru.com/7364/013.jpg
in "jpg", "nijigen-daiaru.com", /^\d+$/ => work_id, file
@work_id = work_id
@page_url = "http://nijigen-daiaru.com/book.php?idb=#{@work_id}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/741/2662741/photo/160341863_624.v1353780834.jpg
in _, "photozou.jp", "pub", /^\d+$/, user_id, "photo", /^(\d+)/ => file
@user_id = user_id
@work_id = $1
@page_url = "https://photozou.jp/photo/show/#{@user_id}/#{@work_id}"
# https://tulip.paheal.net/_images/4f309b2b680da9c3444ed462bb172214/3910816%20-%20Dark_Magician_Girl%20MINK343%20Yu-Gi-Oh!.jpg
# http://rule34-data-002.paheal.net/_images/2ab55f9291c8f2c68cdbeac998714028/2401510%20-%20Ash_Ketchum%20Lillie%20Porkyman.jpg
# http://rule34-images.paheal.net/c4710f05e76bdee22fcd0d62bf1ac840/262685%20-%20mabinogi%20nao.jpg
in _, "paheal.net", *subdirs, /^\h{32}$/ => md5, /^(\d+)/ => file
@md5 = md5
@work_id = $1
@page_url = "https://rule34.paheal.net/post/view/#{@work_id}"
# https://api-cdn-mp4.rule34.xxx/images/4330/2f85040320f64c0e42128a8b8f6071ce.mp4
# https://ny5webm.rule34.xxx//images/4653/3c63956b940d0ff565faa8c7555b4686.mp4?5303486
# https://img.rule34.xxx//images/4977/7d76919c2f713c580f69fe129d2d1a44.jpeg?5668795
# http://rule34.xxx//images/993/5625625970c9ce8c5121fde518c2c4840801cd29.jpg?992983
# http://img3.rule34.xxx/img/rule34//images/1180/76c6497b5138c4122710c2d05458e729a8d34f7b.png?1190815
# http://aimg.rule34.xxx//samples/1267/sample_d628f215f27815dc9c1d365a199ee68e807efac1.jpg?1309664
in _, "rule34.xxx", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpg|jpeg|png|gif|webm|mp4)$/
@md5 = $1
@page_url = "https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
# https://cs.sankakucomplex.com/data/68/6c/686ceee03af38fe4ceb45bf1c50947e0.jpg?e=1591893718&m=fLlJfTrK_j2Rnc0uIHNC3w
# https://v.sankakucomplex.com/data/24/ff/24ff5da1fd7ed051b083b36e4e51de8e.mp4?e=1644999580&m=-OtZg2QdtKbibMte8vlsdw&expires=1644999580&token=0YUdUKKwTmvpozhG1WW_nRvSUQw3WJd574andQv-KYY
# https://cs.sankakucomplex.com/data/sample/2a/45/sample-2a45c67281b0fcfd26208063f81a3114.jpg?e=1590609355&m=cexHhVyJguoZqPB3z3N7aA
# http://c3.sankakucomplex.com/data/sample/8a/44/preview8a44211650e818ef07e5d00284c20a14.jpg
in _, "sankakucomplex.com", "data", *subdirs, /^(?:preview|sample-)?(\h{32})\.(jpg|jpeg|gif|png|webm|mp4)$/
@md5 = $1
@page_url = "https://chan.sankakucomplex.com/post/show?md5=#{@md5}"
# http://shimmie.katawa-shoujo.com/image/3657.jpg
in "shimmie", "katawa-shoujo.com", "image", file
@work_id = filename
@page_url = "https://shimmie.katawa-shoujo.com/post/view/#{@work_id}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
in ("img" | "ecdnimg"), "toranoana.jp", *subdirs, /^\d{2}$/, /^\d{4}$/, /^\d{2}$/, /^\d{2}$/, /^(\d{12})-\d+p\.jpg$/ => file
@work_id = $1
@page_url = "https://ec.toranoana.jp/tora_r/ec/item/#{@work_id}"
# http://p.twpl.jp/show/orig/DTaCZ
# http://p.twpl.jp/show/large/5zack
# http://p.twipple.jp/show/orig/vXqaU
in _, ("twpl.jp" | "twipple.jp"), "show", ("large" | "orig"), work_id
@work_id = work_id
@page_url = "http://p.twipple.jp/#{work_id}"
# https://vignette.wikia.nocookie.net/queensblade/images/3/33/WGAIRI1.jpg/
# https://vignette1.wikia.nocookie.net/valkyriecrusade/images/b/bf/Joan_Of_Arc_H.png/revision/latest?cb=20170801081004
# https://static.wikia.nocookie.net/valkyriecrusade/images/3/3f/Joan_Of_Arc.png/revision/latest/scale-to-width-down/270?cb=20170801081000
in _, "nocookie.net", wiki, "images", /^\h$/, /^\h\h$/, file, *rest
@wiki = wiki
@file = file
@page_url = "https://#{wiki}.fandom.com/wiki/File:#{file}"
# https://static.zerochan.net/Fullmetal.Alchemist.full.2831797.png
# https://s1.zerochan.net/Cocoa.Cookie.600.2957938.jpg
# http://static.zerochan.net/full/24/13/90674.jpg
in _, "zerochan.net", *subdirs, /(\d+)\.(jpg|png|gif)$/
@work_id = $1
@page_url = "https://www.zerochan.net/#{@work_id}#full"
# http://www.zerochan.net/full/1567893
in _, "zerochan.net", "full", /^\d+$/ => work_id
@work_id = work_id
@page_url = "https://www.zerochan.net/#{@work_id}#full"
else
end
end
end

View File

@@ -52,6 +52,10 @@ class Source::URL::Plurk < Source::URL
host == "images.plurk.com" host == "images.plurk.com"
end end
def page_url
"https://www.plurk.com/p/#{work_id}" if work_id.present?
end
def profile_url def profile_url
"https://www.plurk.com/#{username}" if username.present? "https://www.plurk.com/#{username}" if username.present?
end end

View File

@@ -43,6 +43,10 @@ class Source::URL::Skeb < Source::URL
end end
end end
def page_url
"https://skeb.jp/@#{username}/works/#{work_id}" if username.present? && work_id.present?
end
def profile_url def profile_url
"https://skeb.jp/@#{username}" if username.present? "https://skeb.jp/@#{username}" if username.present?
end end

View File

@@ -85,8 +85,7 @@ class Source::URL::Tumblr < Source::URL
end end
def page_url def page_url
return nil unless @blog_name.present? && @work_id.present? "https://#{blog_name}.tumblr.com/post/#{work_id}" if blog_name.present? && work_id.present?
"https://#{@blog_name}.tumblr.com/post/#{@work_id}"
end end
def profile_url def profile_url

View File

@@ -72,8 +72,7 @@ class Source::URL::TwitPic < Source::URL
end end
def page_url def page_url
return nil unless base36_id.present? "https://twitpic.com/#{base36_id}" if base36_id.present?
"https://twitpic.com/#{base36_id}"
end end
def profile_url def profile_url

View File

@@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL
# https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration # https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration
RESERVED_USERNAMES = %w[home i intent search] RESERVED_USERNAMES = %w[home i intent search]
attr_reader :status_id, :twitter_username, :user_id attr_reader :status_id, :username, :user_id
def self.match?(url) def self.match?(url)
return false if Source::URL::TwitPic.match?(url) # TwitPic uses https://o.twimg.com/ URLs return false if Source::URL::TwitPic.match?(url) # TwitPic uses https://o.twimg.com/ URLs
@@ -45,12 +45,12 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1 # https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1
# https://twitter.com/sato_1_11/status/1496489742791475201/photo/2 # https://twitter.com/sato_1_11/status/1496489742791475201/photo/2
in "twitter.com", username, "status", status_id, *rest in "twitter.com", username, "status", status_id, *rest
@twitter_username = username @username = username
@status_id = status_id @status_id = status_id
# https://twitter.com/motty08111213 # https://twitter.com/motty08111213
in "twitter.com", username, *rest in "twitter.com", username, *rest
@twitter_username = username unless username.in?(RESERVED_USERNAMES) @username = username unless username.in?(RESERVED_USERNAMES)
# https://twitter.com/intent/user?user_id=1485229827984531457 # https://twitter.com/intent/user?user_id=1485229827984531457
in "twitter.com", "intent", "user" if params[:user_id].present? in "twitter.com", "intent", "user" if params[:user_id].present?
@@ -58,7 +58,7 @@ class Source::URL::Twitter < Source::URL
# https://twitter.com/intent/user?screen_name=ryuudog_NFT # https://twitter.com/intent/user?screen_name=ryuudog_NFT
in "twitter.com", "intent", "user" if params[:screen_name].present? in "twitter.com", "intent", "user" if params[:screen_name].present?
@twitter_username = params[:screen_name] @username = params[:screen_name]
# https://twitter.com/i/user/889592953 # https://twitter.com/i/user/889592953
in "twitter.com", "i", "user", user_id in "twitter.com", "i", "user", user_id
@@ -101,9 +101,17 @@ class Source::URL::Twitter < Source::URL
"#{site}/#{@file_path}:orig" "#{site}/#{@file_path}:orig"
end end
def page_url
if username.present? && status_id.present?
"https://twitter.com/#{username}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def profile_url def profile_url
if twitter_username.present? if username.present?
"https://twitter.com/#{twitter_username}" "https://twitter.com/#{username}"
elsif user_id.present? elsif user_id.present?
# "https://twitter.com/i/user/#{user_id} # "https://twitter.com/i/user/#{user_id}
"https://twitter.com/intent/user?user_id=#{user_id}" "https://twitter.com/intent/user?user_id=#{user_id}"

View File

@@ -104,7 +104,7 @@ class Source::URL::Weibo < Source::URL
end end
end end
def normalized_url def page_url
if @artist_short_id.present? && @illust_base62_id.present? if @artist_short_id.present? && @illust_base62_id.present?
"https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}" "https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}"
elsif mobile_url.present? elsif mobile_url.present?

View File

@@ -23,7 +23,6 @@ module Sources
Strategies::Foundation, Strategies::Foundation,
Strategies::Plurk, Strategies::Plurk,
Strategies::Tinami, Strategies::Tinami,
Strategies::TwitPic,
Strategies::Fantia, Strategies::Fantia,
] ]
end end
@@ -36,9 +35,5 @@ module Sources
def self.canonical(url, referer) def self.canonical(url, referer)
find(url, referer).canonical_url find(url, referer).canonical_url
end end
def self.normalize_source(url)
find(url).normalize_for_source || url
end
end end
end end

View File

@@ -52,16 +52,6 @@ module Sources::Strategies
end end
end end
def normalize_for_source
return if project_id.blank?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.artstation.com/projects/#{project_id}"
else
"https://www.artstation.com/artwork/#{project_id}"
end
end
def image_urls_from_api def image_urls_from_api
api_response[:assets].to_a.map do |asset| api_response[:assets].to_a.map do |asset|
if asset[:asset_type] == "image" if asset[:asset_type] == "image"

View File

@@ -138,12 +138,6 @@ module Sources
end end
memoize :http_downloader memoize :http_downloader
# Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead.
# This function should never make any network call, even indirectly. Return nil to never normalize.
def normalize_for_source
nil
end
def artists def artists
ArtistFinder.find_artists(profile_url) ArtistFinder.find_artists(profile_url)
end end

View File

@@ -65,10 +65,6 @@ module Sources
end end
end end
def normalize_for_source
page_url_from_image_url
end
def profile_url def profile_url
return nil if artist_name.blank? return nil if artist_name.blank?
"https://www.deviantart.com/#{artist_name.downcase}" "https://www.deviantart.com/#{artist_name.downcase}"

View File

@@ -34,19 +34,6 @@ module Sources
end end
end end
def normalize_for_source
if illust_id.present?
if artist_name_from_url.present?
"https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}"
elsif artist_id_from_url.present?
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}"
end
elsif artist_id_from_url.present?
# Cover images
"https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}"
end
end
def profile_url def profile_url
return if artist_name.blank? return if artist_name.blank?

View File

@@ -128,10 +128,6 @@ module Sources::Strategies
DText.from_html(artist_commentary_desc) DText.from_html(artist_commentary_desc)
end end
def normalize_for_source
page_url
end
def work_type def work_type
parsed_url.work_type || parsed_referer&.work_type parsed_url.work_type || parsed_referer&.work_type
end end

View File

@@ -76,10 +76,6 @@ module Sources
DText.from_html(artist_commentary_desc) DText.from_html(artist_commentary_desc)
end end
def normalize_for_source
page_url
end
def api_response def api_response
return {} if page.nil? return {} if page.nil?

View File

@@ -68,10 +68,6 @@ module Sources
DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n") DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n")
end end
def normalize_for_source
page_url
end
def illust_id def illust_id
parsed_url.work_id || parsed_referer&.work_id parsed_url.work_id || parsed_referer&.work_id
end end

View File

@@ -47,10 +47,6 @@ module Sources
page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html
end end
def normalize_for_source
page_url
end
def illust_id def illust_id
parsed_url.work_id || parsed_referer&.work_id parsed_url.work_id || parsed_referer&.work_id
end end

View File

@@ -79,10 +79,6 @@ module Sources::Strategies
api_response.tags api_response.tags
end end
def normalize_for_source
page_url
end
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc) do |element| DText.from_html(artist_commentary_desc) do |element|
if element.name == "a" if element.name == "a"

View File

@@ -25,17 +25,6 @@ module Sources
image_urls.first image_urls.first
end end
def normalize_for_source
id = post_id_from_url
md5 = post_md5_from_url
if id.present?
"https://#{domain}/post/show/#{id}"
elsif md5.present?
"https://#{domain}/post?tags=md5:#{md5}"
end
end
def tags def tags
api_response[:tags].to_s.split.map do |tag| api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"] [tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"]

View File

@@ -84,10 +84,6 @@ module Sources
end end
end end
def normalize_for_source
page_url
end
def user_name def user_name
parsed_url.username || parsed_referer&.username parsed_url.username || parsed_referer&.username
end end

View File

@@ -66,10 +66,6 @@ module Sources
end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp
end end
def normalize_for_source
page_url
end
def tag_name def tag_name
return if api_client&.user_id.blank? return if api_client&.user_id.blank?
"nicoseiga#{api_client.user_id}" "nicoseiga#{api_client.user_id}"

View File

@@ -109,12 +109,6 @@ module Sources
artist_id_from_url || artist_id_from_page artist_id_from_url || artist_id_from_page
end end
def normalize_for_source
return if illust_id.blank?
"https://nijie.info/view.php?id=#{illust_id}"
end
def doujin? def doujin?
page&.at("#dojin_left").present? page&.at("#dojin_left").present?
end end

View File

@@ -18,112 +18,6 @@ module Sources
def artists def artists
ArtistFinder.find_artists(url) ArtistFinder.find_artists(url)
end end
def normalize_for_source
case url
when %r{\Ahttp://www\.karabako\.net/images(?:ub)?/karabako_(\d+)(?:_\d+)?\.}i
"http://www.karabako.net/post/view/#{$1}"
# XXX http://twipple.jp is defunct
# http://p.twpl.jp/show/orig/myRVs
when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i
"http://p.twipple.jp/#{$1}"
when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i
username = $1
filename = $2
"http://#{username}.blog.fc2.com/img/#{filename}/"
when %r{\Ahttps?://diary(\d)?\.fc2\.com/user/([^/]+)/img/(\d+)_(\d+)/(\d+)\.}i
server_id = $1
username = $2
year = $3
month = $4
day = $5
"http://diary#{server_id}.fc2.com/cgi-sys/ed.cgi/#{username}?Y=#{year}&M=#{month}&D=#{day}"
when %r{\Ahttps?://(?:fbcdn-)?s(?:content|photos)-[^/]+\.(?:fbcdn|akamaihd)\.net/hphotos-.+/\d+_(\d+)_(?:\d+_){1,3}[no]\.}i
"https://www.facebook.com/photo.php?fbid=#{$1}"
when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i
"https://chan.sankakucomplex.com/en/post/show?md5=#{$1}"
when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i
"https://www.zerochan.net/#{$1}#full"
when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i
"http://gallery.minitokyo.net/download/#{$1}"
# https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg
# http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png
# https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg
when %r{\Ahttps?://(?:\w+\.)?gelbooru\.com//?(?:images|samples)/(?:\d+|\h\h/\h\h)/(?:sample_)?(?<md5>\h{32})\.}i
"https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{$~[:md5]}"
when %r{\Ahttps?://(?:slot\d*\.)?im(?:g|ages)\d*\.wikia\.(?:nocookie\.net|com)/(?:_{2}cb\d{14}/)?([^/]+)(?:/[a-z]{2})?/images/(?:(?:thumb|archive)?/)?[a-f0-9]/[a-f0-9]{2}/(?:\d{14}(?:!|%21))?([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://vignette(?:\d*)\.wikia\.nocookie\.net/([^/]+)/images/[a-f0-9]/[a-f0-9]{2}/([^/]+)}i
subdomain = $1
filename = $2
"https://#{subdomain}.wikia.com/wiki/File:#{filename}"
when %r{\Ahttps?://e-shuushuu.net/images/\d{4}-(?:\d{2}-){2}(\d+)}i
"https://e-shuushuu.net/image/#{$1}"
when %r{\Ahttps?://jpg\.nijigen-daiaru\.com/(\d+)}i
"http://nijigen-daiaru.com/book.php?idb=#{$1}"
when %r{\Ahttps?://sozai\.doujinantena\.com/contents_jpg/([a-f0-9]{32})/}i
"http://doujinantena.com/page.php?id=#{$1}"
when %r{\Ahttps?://rule34-(?:data-\d{3}|images)\.paheal\.net/(?:_images/)?([a-f0-9]{32})}i
"https://rule34.paheal.net/post/list/md5:#{$1}/1"
when %r{\Ahttps?://shimmie\.katawa-shoujo\.com/image/(\d+)}i
"https://shimmie.katawa-shoujo.com/post/view/#{$1}"
when %r{\Ahttps://(?:(?:\w+\.)?rule34\.xxx|img\.booru\.org/(?:rule34|r34))(?:/(?:img/rule34|r34))?/{1,2}images/\d+/([a-f0-9]{32})\.}i
"https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}"
when %r{(\Ahttps?://.+)/diarypro/d(?:ata/upfile/|iary\.cgi\?mode=image&upfile=)(\d+)}i
base_url = $1
entry_no = $2
"#{base_url}/diarypro/diary.cgi?no=#{entry_no}"
# XXX site is defunct
when %r{\Ahttps?://i(?:\d)?\.minus\.com/(?:i|j)([^\.]{12,})}i
"http://minus.com/i/#{$1}"
# http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg
# http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg
when %r{\Ahttps?://\w+\.photozou\.jp/pub/\d+/(?<artist_id>\d+)/photo/(?<photo_id>\d+)_.*$}i
"https://photozou.jp/photo/show/#{$~[:artist_id]}/#{$~[:photo_id]}"
# http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg
# http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg
# http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg
# https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg
when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?<work_id>\d+)}i
"https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/"
# https://a.hitomi.la/galleries/907838/1.png
# https://0a.hitomi.la/galleries/1169701/23.png
# https://aa.hitomi.la/galleries/990722/003_01_002.jpg
# https://la.hitomi.la/galleries/1054851/001_main_image.jpg
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/(?<image_id>\d+)\w*\.[a-z]+\z}i
"https://hitomi.la/reader/#{$~[:gallery_id]}.html##{$~[:image_id].to_i}"
# https://aa.hitomi.la/galleries/883451/t_rena1g.png
when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?<gallery_id>\d+)/\w*\.[a-z]+\z}i
"https://hitomi.la/galleries/#{$~[:gallery_id]}.html"
else
nil
end
end
end end
end end
end end

View File

@@ -96,11 +96,6 @@ module Sources
api_illust[:description] api_illust[:description]
end end
def normalize_for_source
return nil if illust_id.blank?
"https://www.pixiv.net/artworks/#{illust_id}"
end
def tag_name def tag_name
moniker moniker
end end

View File

@@ -110,10 +110,6 @@ module Sources
end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "")
end end
def normalize_for_source
page_url
end
memoize :page, :page_json, :api_replies memoize :page, :page_json, :api_replies
end end
end end

View File

@@ -48,10 +48,6 @@ module Sources
"https://skeb.jp/@#{artist_name}/works/#{illust_id}" "https://skeb.jp/@#{artist_name}/works/#{illust_id}"
end end
def normalize_for_source
page_url
end
def api_url def api_url
return nil unless artist_name.present? && illust_id.present? return nil unless artist_name.present? && illust_id.present?
"https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}" "https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}"

View File

@@ -83,10 +83,6 @@ module Sources::Strategies
super(tag) super(tag)
end end
def normalize_for_source
parsed_url.page_url
end
def dtext_artist_commentary_desc def dtext_artist_commentary_desc
DText.from_html(artist_commentary_desc).strip DText.from_html(artist_commentary_desc).strip
end end

View File

@@ -1,14 +0,0 @@
# frozen_string_literal: true
# @see Source::URL::TwitPic
module Sources::Strategies
class TwitPic < Base
def match?
Source::URL::TwitPic === parsed_url
end
def normalize_for_source
parsed_url.page_url || url
end
end
end

View File

@@ -93,14 +93,6 @@ module Sources::Strategies
api_response[:full_text].to_s api_response[:full_text].to_s
end end
def normalize_for_source
if tag_name_from_url.present? && status_id.present?
"https://twitter.com/#{tag_name_from_url}/status/#{status_id}"
elsif status_id.present?
"https://twitter.com/i/web/status/#{status_id}"
end
end
def tags def tags
api_response.dig(:entities, :hashtags).to_a.map do |hashtag| api_response.dig(:entities, :hashtags).to_a.map do |hashtag|
[hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"] [hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"]
@@ -150,7 +142,7 @@ module Sources::Strategies
end end
def tag_name_from_url def tag_name_from_url
parsed_url.twitter_username || parsed_referer&.twitter_username parsed_url.username || parsed_referer&.username
end end
memoize :api_response memoize :api_response

View File

@@ -87,10 +87,6 @@ module Sources
end end
end end
def normalize_for_source
parsed_url.normalized_url
end
def api_response def api_response
return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank? return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank?

View File

@@ -307,15 +307,16 @@ class Post < ApplicationRecord
end end
end end
def parsed_source
Source::URL.parse(source) if web_source?
end
def normalized_source def normalized_source
return source unless web_source? parsed_source&.page_url || source
Sources::Strategies.normalize_source(source)
end end
def source_domain def source_domain
return "" unless web_source? parsed_source&.domain.to_s
Danbooru::URL.parse(normalized_source)&.domain.to_s
end end
end end

View File

@@ -194,22 +194,13 @@ module Sources
assert_equal("sa-dui", site.artist_name) assert_equal("sa-dui", site.artist_name)
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://www.artstation.com/artwork/ghost-in-the-shell-fandom" assert_equal("https://www.artstation.com/artwork/ghost-in-the-shell-fandom", Source::URL.page_url("https://www.artstation.com/artwork/ghost-in-the-shell-fandom"))
source2 = "https://anubis1982918.artstation.com/projects/qPVGP/" assert_equal("https://www.artstation.com/artwork/qPVGP", Source::URL.page_url("https://anubis1982918.artstation.com/projects/qPVGP/"))
source3 = "https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041" assert_equal("https://www.artstation.com/artwork/NoNmD", Source::URL.page_url("https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"))
assert_nil(Source::URL.page_url("http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"))
assert_equal(source1, Sources::Strategies.normalize_source(source1)) assert_nil(Source::URL.page_url("https://www.artstation.com"))
assert_equal("https://anubis1982918.artstation.com/projects/qPVGP", Sources::Strategies.normalize_source(source2))
assert_equal("https://dudeunderscore.artstation.com/projects/NoNmD", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"
bad_source2 = "https://www.artstation.com"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
end end
end end
end end

View File

@@ -377,8 +377,8 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls " do
should "normalize correctly" do should "work" do
source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg" source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg"
source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg" source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg"
source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png" source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png"
@@ -386,21 +386,18 @@ module Sources
source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc" source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc"
source6 = "https://fav.me/dbc3a48" source6 = "https://fav.me/dbc3a48"
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Sources::Strategies.normalize_source(source1)) assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Source::URL.page_url(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Sources::Strategies.normalize_source(source2)) assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Source::URL.page_url(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Sources::Strategies.normalize_source(source3)) assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Source::URL.page_url(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Sources::Strategies.normalize_source(source4)) assert_equal("https://www.deviantart.com/deviation/417560500", Source::URL.page_url(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Sources::Strategies.normalize_source(source5)) assert_equal("https://www.deviantart.com/deviation/599977532", Source::URL.page_url(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Sources::Strategies.normalize_source(source6)) assert_equal("https://www.deviantart.com/deviation/685436408", Source::URL.page_url(source6))
end end
should "avoid normalizing unnormalizable urls" do should "handle inconvertible urls" do
bad_source1 = "http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg" assert_nil(Source::URL.page_url("http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"))
bad_source2 = "http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg" assert_nil(Source::URL.page_url("http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"))
bad_source3 = "https://deviantart.net" assert_nil(Source::URL.page_url("https://deviantart.net"))
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
end end
end end
end end

View File

@@ -135,17 +135,15 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize cover images to the profile link" do should "convert cover images to the profile url" do
cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg" cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg"
assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Sources::Strategies.normalize_source(cover)) assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Source::URL.page_url(cover))
end end
should "avoid normalizing unnormalizable urls" do should "handle inconvertible urls" do
bad_source1 = "https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg" assert_nil(Source::URL.page_url("https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg"))
bad_source2 = "https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png" assert_nil(Source::URL.page_url("https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png"))
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
end end
end end
end end

View File

@@ -82,20 +82,16 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg" source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg"
source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg" source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg"
source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver." source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver."
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source1)) assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source2)) assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Sources::Strategies.normalize_source(source3)) assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Source::URL.page_url(source3))
end assert_nil(Source::URL.page_url("https://pictures.hentai-foundry.com/a/AnimeFlux"))
should "avoid normalizing unnormalizable urls" do
bad_source = "https://pictures.hentai-foundry.com/a/AnimeFlux"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end end
end end

View File

@@ -122,25 +122,17 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://pawoo.net/@evazion/19451018/" assert_equal("https://pawoo.net/@evazion/19451018", Source::URL.page_url("https://pawoo.net/@evazion/19451018/"))
source2 = "https://pawoo.net/web/statuses/19451018/favorites" assert_equal("https://pawoo.net/web/statuses/19451018", Source::URL.page_url("https://pawoo.net/web/statuses/19451018/favorites"))
source3 = "https://baraag.net/@bardbot/105732813175612920/" assert_equal("https://baraag.net/@bardbot/105732813175612920", Source::URL.page_url("https://baraag.net/@bardbot/105732813175612920/"))
assert_equal("https://pawoo.net/@evazion/19451018", Sources::Strategies.normalize_source(source1))
assert_equal("https://pawoo.net/web/statuses/19451018", Sources::Strategies.normalize_source(source2))
assert_equal("https://baraag.net/@bardbot/105732813175612920", Sources::Strategies.normalize_source(source3))
end end
should "avoid normalizing unnormalizable urls" do should "handle inconvertible urls" do
bad_source1 = "https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png" assert_nil(Source::URL.page_url("https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"))
bad_source2 = "https://pawoo.net/@evazion/media" assert_nil(Source::URL.page_url("https://pawoo.net/@evazion/media"))
bad_source3 = "https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png" assert_nil(Source::URL.page_url("https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png"))
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
end end
end end

View File

@@ -111,29 +111,29 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize yande.re sources correctly" do should "generate yande.re urls correctly" do
source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png" source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png"
source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg" source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg"
source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg" source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg"
source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png" source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png"
source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg" source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg"
assert_equal("https://yande.re/post/show/377828", Sources::Strategies.normalize_source(source1)) assert_equal("https://yande.re/post/show/377828", Source::URL.page_url(source1))
assert_equal("https://yande.re/post/show/349790", Sources::Strategies.normalize_source(source2)) assert_equal("https://yande.re/post/show/349790", Source::URL.page_url(source2))
assert_equal("https://yande.re/post/show/469784", Sources::Strategies.normalize_source(source3)) assert_equal("https://yande.re/post/show/469784", Source::URL.page_url(source3))
assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", Sources::Strategies.normalize_source(source4)) assert_equal("https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6", Source::URL.page_url(source4))
assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", Sources::Strategies.normalize_source(source5)) assert_equal("https://yande.re/post/show?md5=22577d2344fe694cf47f80563031b3cd", Source::URL.page_url(source5))
end end
should "normalize konachan.com sources correctly" do should "generate konachan.com urls correctly" do
source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg" source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg"
source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg" source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"
source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg" source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg"
assert_equal("https://konachan.com/post/show/270807", Sources::Strategies.normalize_source(source1)) assert_equal("https://konachan.com/post/show/270807", Source::URL.page_url(source1))
assert_equal("https://konachan.com/post/show/270803", Sources::Strategies.normalize_source(source2)) assert_equal("https://konachan.com/post/show/270803", Source::URL.page_url(source2))
assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", Sources::Strategies.normalize_source(source3)) assert_equal("https://konachan.com/post/show?md5=99a3c4f10c327d54486259a74173fc0b", Source::URL.page_url(source3))
end end
end end
end end

View File

@@ -98,16 +98,10 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source = "https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181" assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Source::URL.page_url("https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181"))
assert_nil(Source::URL.page_url("https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"))
assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Sources::Strategies.normalize_source(source))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end end
end end
end end

View File

@@ -159,22 +159,18 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf" source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf"
source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893" source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893"
source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663" source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"
source4 = "http://seiga.nicovideo.jp/image/source?id=3312222" source4 = "http://seiga.nicovideo.jp/image/source?id=3312222"
assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Sources::Strategies.normalize_source(source1)) assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Source::URL.page_url(source1))
assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Sources::Strategies.normalize_source(source2)) assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Source::URL.page_url(source2))
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Sources::Strategies.normalize_source(source3)) assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Source::URL.page_url(source3))
assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Sources::Strategies.normalize_source(source4)) assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Source::URL.page_url(source4))
end assert_nil(Source::URL.page_url("https://seiga.nicovideo.jp"))
should "avoid normalizing unnormalizable urls" do
bad_source = "https://seiga.nicovideo.jp"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end end
end end

View File

@@ -317,23 +317,23 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png" source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png"
source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png" source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png"
assert_equal("https://nijie.info/view.php?id=218856", Sources::Strategies.normalize_source(source1)) assert_equal("https://nijie.info/view.php?id=218856", Source::URL.page_url(source1))
assert_equal("https://nijie.info/view.php?id=287736", Sources::Strategies.normalize_source(source2)) assert_equal("https://nijie.info/view.php?id=287736", Source::URL.page_url(source2))
end end
should "avoid normalizing unnormalizable urls" do should "handle inconvertible urls" do
bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg" bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg"
bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png" bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png"
bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg" bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) assert_nil(Source::URL.page_url(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) assert_nil(Source::URL.page_url(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3)) assert_nil(Source::URL.page_url(bad_source3))
end end
end end

View File

@@ -29,30 +29,30 @@ module Sources
context "normalizing for source" do context "normalizing for source" do
should "normalize karabako links" do should "normalize karabako links" do
source = "http://www.karabako.net/images/karabako_38835.jpg" source = "http://www.karabako.net/images/karabako_38835.jpg"
assert_equal("http://www.karabako.net/post/view/38835", Sources::Strategies.normalize_source(source)) assert_equal("http://www.karabako.net/post/view/38835", Source::URL.page_url(source))
end end
should "normalize twipple links" do should "normalize twipple links" do
source = "http://p.twpl.jp/show/orig/mI2c3" source = "http://p.twpl.jp/show/orig/mI2c3"
assert_equal("http://p.twipple.jp/mI2c3", Sources::Strategies.normalize_source(source)) assert_equal("http://p.twipple.jp/mI2c3", Source::URL.page_url(source))
end end
should "normalize fc2 links" do should "normalize fc2 links" do
source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png" source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png"
source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg" source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg"
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png/", Sources::Strategies.normalize_source(source1)) assert_equal("http://tuyadasi.blog.fc2.com/img/file.png", Source::URL.page_url(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Sources::Strategies.normalize_source(source2)) assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Source::URL.page_url(source2))
end end
should "normalize facebook links" do should "normalize facebook links" do
source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg" source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg"
assert_equal("https://www.facebook.com/photo.php?fbid=576443445841777", Sources::Strategies.normalize_source(source)) assert_equal("https://www.facebook.com/photo?fbid=576443445841777", Source::URL.page_url(source))
end end
should "normalize sankaku links" do should "normalize sankaku links" do
source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848" source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848"
assert_equal("https://chan.sankakucomplex.com/en/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Sources::Strategies.normalize_source(source)) assert_equal("https://chan.sankakucomplex.com/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Source::URL.page_url(source))
end end
should "normalize zerochan links" do should "normalize zerochan links" do
@@ -60,17 +60,17 @@ module Sources
source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg" source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg"
source3 = "http://www.zerochan.net/full/1567893" source3 = "http://www.zerochan.net/full/1567893"
assert_equal("https://www.zerochan.net/183273#full", Sources::Strategies.normalize_source(source1)) assert_equal("https://www.zerochan.net/183273#full", Source::URL.page_url(source1))
assert_equal("https://www.zerochan.net/411536#full", Sources::Strategies.normalize_source(source2)) assert_equal("https://www.zerochan.net/411536#full", Source::URL.page_url(source2))
assert_equal("https://www.zerochan.net/1567893#full", Sources::Strategies.normalize_source(source3)) assert_equal("https://www.zerochan.net/1567893#full", Source::URL.page_url(source3))
end end
should "normalize minitokyo links" do should "normalize minitokyo links" do
source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg" source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg"
source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019" source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019"
assert_equal("http://gallery.minitokyo.net/download/365677", Sources::Strategies.normalize_source(source1)) assert_equal("http://gallery.minitokyo.net/view/365677", Source::URL.page_url(source1))
assert_equal("http://gallery.minitokyo.net/download/199164", Sources::Strategies.normalize_source(source2)) assert_equal("http://gallery.minitokyo.net/view/199164", Source::URL.page_url(source2))
end end
should "normalize gelbooru links" do should "normalize gelbooru links" do
@@ -78,87 +78,83 @@ module Sources
source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png" source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png"
source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg" source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg"
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Sources::Strategies.normalize_source(source1)) assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Source::URL.page_url(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Sources::Strategies.normalize_source(source2)) assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Sources::Strategies.normalize_source(source3)) assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3))
end end
should "normalize wikia links" do should "normalize wikia links" do
source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954" source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954"
assert_equal("https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png", Sources::Strategies.normalize_source(source)) assert_equal("https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png", Source::URL.page_url(source))
end end
should "normalize e-shuushuu links" do should "normalize e-shuushuu links" do
source = "http://e-shuushuu.net/images/2014-07-22-662472.png" source = "http://e-shuushuu.net/images/2014-07-22-662472.png"
assert_equal("https://e-shuushuu.net/image/662472", Sources::Strategies.normalize_source(source)) assert_equal("https://e-shuushuu.net/image/662472", Source::URL.page_url(source))
end end
should "normalize nijigen-daiaru links" do should "normalize nijigen-daiaru links" do
source = "http://jpg.nijigen-daiaru.com/19909/029.jpg" source = "http://jpg.nijigen-daiaru.com/19909/029.jpg"
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Sources::Strategies.normalize_source(source)) assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Source::URL.page_url(source))
end end
should "normalize doujinantena links" do should "normalize doujinantena links" do
source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg" source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg"
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Sources::Strategies.normalize_source(source)) assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Source::URL.page_url(source))
end end
should "normalize paheal.net links" do should "normalize paheal.net links" do
source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg" source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg"
assert_equal("https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1", Sources::Strategies.normalize_source(source)) assert_equal("https://rule34.paheal.net/post/view/852405", Source::URL.page_url(source))
end end
should "normalize shimmie.katawa-shoujo.com links" do should "normalize shimmie.katawa-shoujo.com links" do
source = "http://shimmie.katawa-shoujo.com/image/2740.png" source = "http://shimmie.katawa-shoujo.com/image/2740.png"
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Sources::Strategies.normalize_source(source)) assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Source::URL.page_url(source))
end end
should "normalize rule34.xxx links" do should "normalize rule34.xxx links" do
source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807" source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807"
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Sources::Strategies.normalize_source(source)) assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Source::URL.page_url(source))
end end
should "normalize diarypro links" do should "normalize diarypro links" do
source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg" source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg"
source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg" source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg"
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Sources::Strategies.normalize_source(source1)) assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Source::URL.page_url(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Sources::Strategies.normalize_source(source2)) assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Source::URL.page_url(source2))
end end
should "normalize minus.com links" do should "normalize minus.com links" do
source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg" source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg"
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Sources::Strategies.normalize_source(source)) assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Source::URL.page_url(source))
end end
should "normalize photozou links" do should "normalize photozou links" do
source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg" source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg"
source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg" source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg"
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Sources::Strategies.normalize_source(source1)) assert_equal("https://photozou.jp/photo/show/1481794/161537258", Source::URL.page_url(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Sources::Strategies.normalize_source(source2)) assert_equal("https://photozou.jp/photo/show/1986212/118493247", Source::URL.page_url(source2))
end end
should "normalize toranoana links" do should "normalize toranoana links" do
source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg" source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg"
source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg" source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg"
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695/", Sources::Strategies.normalize_source(source1)) assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695", Source::URL.page_url(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417/", Sources::Strategies.normalize_source(source2)) assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417", Source::URL.page_url(source2))
end end
should "normalize hitomi.la links" do should "normalize hitomi.la links" do
source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png" source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png"
source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg" source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg"
assert_equal("https://hitomi.la/galleries/883451.html", Sources::Strategies.normalize_source(source1)) assert_equal("https://hitomi.la/galleries/883451.html", Source::URL.page_url(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Sources::Strategies.normalize_source(source2)) assert_equal("https://hitomi.la/reader/1054851.html#1", Source::URL.page_url(source2))
end end
should "leave unknown sources as they are" do should "leave unknown sources as they are" do
source1 = "https://google.com" assert_nil(Source::URL.page_url("https://google.com"))
source2 = "a bad non-http source" assert_nil(Source::URL.page_url("a bad non-http source"))
source3 = "https://example.com/Folder/中央大学.html" assert_nil(Source::URL.page_url("https://example.com/Folder/中央大学.html"))
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source3, Sources::Strategies.normalize_source(source3))
end end
end end
end end

View File

@@ -348,19 +348,19 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png" source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png"
source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg" source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg"
source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg" source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png" source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip" source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip"
assert_equal("https://www.pixiv.net/artworks/39749565", Sources::Strategies.normalize_source(source1)) assert_equal("https://www.pixiv.net/artworks/39749565", Source::URL.page_url(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Sources::Strategies.normalize_source(source2)) assert_equal("https://www.pixiv.net/artworks/39735353", Source::URL.page_url(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source3)) assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source4)) assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Sources::Strategies.normalize_source(source5)) assert_equal("https://www.pixiv.net/artworks/44524589", Source::URL.page_url(source5))
end end
end end
end end

View File

@@ -99,10 +99,10 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "avoid normalizing unnormalizable urls" do should "handle inconvertible urls" do
bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a" bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) assert_nil(Source::URL.page_url(bad_source))
end end
end end
end end

View File

@@ -228,22 +228,18 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://octrain1020.tumblr.com/post/190713122589" source1 = "https://octrain1020.tumblr.com/post/190713122589"
source2 = "https://octrain1020.tumblr.com/image/190713122589" source2 = "https://octrain1020.tumblr.com/image/190713122589"
source3 = "https://octrain1020.tumblr.com/image/190713122589#asd" source3 = "https://octrain1020.tumblr.com/image/190713122589#asd"
source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false" source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false"
assert_equal(source1, Sources::Strategies.normalize_source(source1)) assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2)) assert_equal(source1, Source::URL.page_url(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3)) assert_equal(source1, Source::URL.page_url(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4)) assert_equal("https://superboin.tumblr.com/post/141169066579", Source::URL.page_url(source4))
end assert_nil(Source::URL.page_url("https://octrain1020.tumblr.com/"))
should "avoid normalizing unnormalizable urls" do
bad_source = "https://octrain1020.tumblr.com/"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end end
end end
end end

View File

@@ -2,20 +2,11 @@ require 'test_helper'
module Sources module Sources
class TwitPicTest < ActiveSupport::TestCase class TwitPicTest < ActiveSupport::TestCase
context "normalizing for source" do context "generating page urls" do
should "normalize d3j5vwomefv46c.cloudfront.net links" do should "work" do
source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199" assert_equal("https://twitpic.com/dks0tb", Source::URL.page_url("http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"))
assert_equal("https://twitpic.com/dks0tb", Sources::Strategies.normalize_source(source)) assert_equal("https://twitpic.com/dvitq3", Source::URL.page_url("https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"))
end assert_equal("https://twitpic.com/dtnuru", Source::URL.page_url("https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"))
should "normalize dn3pm25xmtlyu.cloudfront.net links" do
source = "https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"
assert_equal("https://twitpic.com/dvitq3", Sources::Strategies.normalize_source(source))
end
should "normalize o.twimg.com links" do
source = "https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"
assert_equal("https://twitpic.com/dtnuru", Sources::Strategies.normalize_source(source))
end end
end end
end end

View File

@@ -291,18 +291,18 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://twitter.com/i/web/status/1261877313349640194" source1 = "https://twitter.com/i/web/status/1261877313349640194"
source2 = "https://twitter.com/BOW999/status/1261877313349640194" source2 = "https://twitter.com/BOW999/status/1261877313349640194"
source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1" source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1"
source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19" source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19"
assert_equal(source1, Sources::Strategies.normalize_source(source1)) assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2)) assert_equal(source2, Source::URL.page_url(source2))
assert_equal(source2, Sources::Strategies.normalize_source(source3)) assert_equal(source2, Source::URL.page_url(source3))
assert_equal(source2, Sources::Strategies.normalize_source(source4)) assert_equal(source2, Source::URL.page_url(source4))
assert_equal("https://www.twitter.com/irt_5433", Sources::Strategies.normalize_source("https://www.twitter.com/irt_5433")) assert_nil(Source::URL.page_url("https://www.twitter.com/irt_5433"))
end end
end end
end end

View File

@@ -113,25 +113,19 @@ module Sources
end end
end end
context "normalizing for source" do context "generating page urls" do
should "normalize correctly" do should "work" do
source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime" source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime"
source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81" source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81"
source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635" source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635"
source4 = "https://tw.weibo.com/SEINEN/4098035921690224" source4 = "https://tw.weibo.com/SEINEN/4098035921690224"
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Sources::Strategies.normalize_source(source1)) assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Source::URL.page_url(source1))
assert_equal("https://m.weibo.cn/detail/4242129997905387", Sources::Strategies.normalize_source(source2)) assert_equal("https://m.weibo.cn/detail/4242129997905387", Source::URL.page_url(source2))
assert_equal("https://m.weibo.cn/status/4173757483008088", Sources::Strategies.normalize_source(source3)) assert_equal("https://m.weibo.cn/status/4173757483008088", Source::URL.page_url(source3))
assert_equal("https://m.weibo.cn/detail/4098035921690224", Sources::Strategies.normalize_source(source4)) assert_equal("https://m.weibo.cn/detail/4098035921690224", Source::URL.page_url(source4))
end assert_nil(Source::URL.page_url("https://weibo.com/u/"))
assert_nil(Source::URL.page_url("https://www.weibo.com/4ubergine/photos"))
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://weibo.com/u/"
bad_source2 = "https://www.weibo.com/4ubergine/photos"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
end end
end end
end end