diff --git a/app/logical/source/url.rb b/app/logical/source/url.rb index 7401d890e..66deca667 100644 --- a/app/logical/source/url.rb +++ b/app/logical/source/url.rb @@ -16,7 +16,7 @@ # url = Source::URL.parse("https://twitter.com/yasunavert/status/1496123903290314755") # url.site_name # => "Twitter" # url.status_id # => "1496123903290314755" -# url.twitter_username # => "yasunavert" +# url.username # => "yasunavert" # # @see Danbooru::URL module Source @@ -53,7 +53,7 @@ module Source # @return [Source::URL] def self.parse!(url) url = Danbooru::URL.new(url) - subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL + subclass = SUBCLASSES.find { |c| c.match?(url) } || Source::URL::Null subclass.new(url) end @@ -78,39 +78,30 @@ module Source # # @return [String] def site_name - # XXX should go in dedicated subclasses. - case host - when /ask\.fm\z/i - "Ask.fm" - when /bcy\.net\z/i - "BCY" - when /booth\.pm\z/i - "Booth.pm" - when /circle\.ms\z/i - "Circle.ms" - when /dlsite\.(com|net)\z/i - "DLSite" - when /doujinshi\.mugimugi\.org\z/i - "Doujinshi.org" - when /fc2\.com\z/i - "FC2" - when /ko-fi\.com\z/i - "Ko-fi" - when /mixi\.jp\z/i - "Mixi.jp" - when /piapro\.jp\z/i - "Piapro.jp" - when /sakura\.ne\.jp\z/i - "Sakura.ne.jp" - else - if self.class == Source::URL - # "www.melonbooks.co.jp" => "Melonbooks" - parsed_domain.sld.titleize - else - # "Source::URL::NicoSeiga" => "Nico Seiga" - self.class.name.demodulize.titleize - end - end + # "Source::URL::NicoSeiga" => "Nico Seiga" + self.class.name.demodulize.titleize + end + + # Convert an image URL to the URL of the page containing the image, or + # return nil if it's not possible to convert the current URL to a page URL. + # + # When viewing a post, the source will be shown as the page URL if it's + # possible to convert the source from an image URL to a page URL. + # + # Examples: + # + # * https://i.pximg.net/img-original/img/2014/10/03/18/10/20/46324488_p0.png + # => https://www.pixiv.net/artworks/46324488 + # + # * https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg + # => https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896 + # + # * https://pbs.twimg.com/media/EBGbJe_U8AA4Ekb.jpg:orig + # => nil + # + # @return [String, nil] + def page_url + nil end # Convert the current URL into a profile URL, or return nil if it's not @@ -134,6 +125,14 @@ module Source nil end + def self.page_url(url) + Source::URL.parse(url)&.page_url + end + + def self.profile_url(url) + Source::URL.parse(url)&.profile_url + end + protected def initialize(...) super(...) parse diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb index c67134538..25cc31404 100644 --- a/app/logical/source/url/art_station.rb +++ b/app/logical/source/url/art_station.rb @@ -79,6 +79,10 @@ class Source::URL::ArtStation < Source::URL end end + def page_url + "https://www.artstation.com/artwork/#{work_id}" if work_id.present? + end + def profile_url "https://www.artstation.com/#{username}" if username.present? end diff --git a/app/logical/source/url/fanbox.rb b/app/logical/source/url/fanbox.rb index 7e9cc6f7b..094bf7bd5 100644 --- a/app/logical/source/url/fanbox.rb +++ b/app/logical/source/url/fanbox.rb @@ -78,6 +78,17 @@ class Source::URL::Fanbox < Source::URL to_s.gsub(%r{/[cw]/\w+/}, "/") if image_url? end + def page_url + if username.present? && work_id.present? + "https://#{username}.fanbox.cc/posts/#{work_id}" + elsif user_id.present? && work_id.present? + "https://www.pixiv.net/fanbox/creator/#{user_id}/post/#{work_id}" + elsif user_id.present? && image_url? + # Use profile url as page url for cover images (XXX may cause problems with bad_source detection) + "https://www.pixiv.net/fanbox/creator/#{user_id}" + end + end + def profile_url if username.present? "https://#{username}.fanbox.cc" diff --git a/app/logical/source/url/fantia.rb b/app/logical/source/url/fantia.rb index 7253628f9..262b9a022 100644 --- a/app/logical/source/url/fantia.rb +++ b/app/logical/source/url/fantia.rb @@ -55,6 +55,7 @@ class Source::URL::Fantia < Source::URL # https://fantia.jp/fanclubs/64496 # https://fantia.jp/fanclubs/1654/posts + # https://job.fantia.jp/fanclubs/5734 in _, "fanclubs", /\d+/ => fanclub_id, *rest @fanclub_id = fanclub_id diff --git a/app/logical/source/url/fc2.rb b/app/logical/source/url/fc2.rb index 76324e14e..dbb96953b 100644 --- a/app/logical/source/url/fc2.rb +++ b/app/logical/source/url/fc2.rb @@ -1,7 +1,7 @@ # frozen_string_literal: true class Source::URL::Fc2 < Source::URL - attr_reader :username, :profile_url + attr_reader :username, :profile_url, :page_url def self.match?(url) url.domain.in?(%w[fc2.com fc2blog.net fc2blog.us]) @@ -48,6 +48,7 @@ class Source::URL::Fc2 < Source::URL # http://blog.fc2.com/g/b/o/gbot/20071023195141.jpg in (/^blog-imgs-\d+(-origin)?$/ | "blog"), "fc2", "com", /^\w$/, /^\w$/, /^\w$/, username, file @username = username + @page_url = "http://#{username}.blog.fc2.com/img/#{file}" @profile_url = "http://#{username}.blog.fc2.com" # http://diary.fc2.com/user/yuuri/img/2005_12/26.jpg @@ -55,6 +56,9 @@ class Source::URL::Fc2 < Source::URL # http://diary.fc2.com/user/kazuharoom/img/2015_5/22.jpg in /diary\d*$/, "fc2", "com", "user", username, "img", date, file @username = username + @year, @month = date.split("_") + @day = filename + @page_url = "http://#{host}/cgi-sys/ed.cgi/#{username}?Y=#{@year}&M=#{@month}&D=#{@day}" @profile_url = "http://diary.fc2.com/cgi-sys/ed.cgi/#{username}" # http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom/?Y=2012&M=10&D=22 diff --git a/app/logical/source/url/foundation.rb b/app/logical/source/url/foundation.rb index 76c3a8c0b..6e4f94279 100644 --- a/app/logical/source/url/foundation.rb +++ b/app/logical/source/url/foundation.rb @@ -6,6 +6,7 @@ # Unsupported patterns: # * https://foundation.app/@ <- This seems to be a novelty account. # * https://foundation.app/mochiiimo <- no @ +# * https://foundation.app/collection/kgfgen class Source::URL::Foundation < Source::URL attr_reader :username, :token_id, :work_id, :hash diff --git a/app/logical/source/url/hentai_foundry.rb b/app/logical/source/url/hentai_foundry.rb index e5c37e687..95f5da266 100644 --- a/app/logical/source/url/hentai_foundry.rb +++ b/app/logical/source/url/hentai_foundry.rb @@ -63,6 +63,14 @@ class Source::URL::HentaiFoundry < Source::URL end end + def page_url + if username.present? && work_id.present? + "https://www.hentai-foundry.com/pictures/user/#{username}/#{work_id}" + elsif work_id.present? + "https://www.hentai-foundry.com/pic-#{work_id}" + end + end + def profile_url "https://www.hentai-foundry.com/user/#{username}" if username.present? end diff --git a/app/logical/source/url/instagram.rb b/app/logical/source/url/instagram.rb index c9f8a826d..bc03fa0d4 100644 --- a/app/logical/source/url/instagram.rb +++ b/app/logical/source/url/instagram.rb @@ -34,6 +34,10 @@ class Source::URL::Instagram < Source::URL end end + def page_url + "https://www.instagram.com/p/#{work_id}/" if work_id.present? + end + def profile_url # Instagram URLs canonically end with "/" "https://www.instagram.com/#{username}/" if username.present? diff --git a/app/logical/source/url/lofter.rb b/app/logical/source/url/lofter.rb index bbd623ef7..f219b7cbb 100644 --- a/app/logical/source/url/lofter.rb +++ b/app/logical/source/url/lofter.rb @@ -52,6 +52,10 @@ class Source::URL::Lofter < Source::URL "#{site}#{path}" if image_url? end + def page_url + "https://#{username}.lofter.com/post/#{work_id}" if username.present? && work_id.present? + end + def profile_url "https://#{username}.lofter.com" if username.present? end diff --git a/app/logical/source/url/mastodon.rb b/app/logical/source/url/mastodon.rb index 056cfc76a..78b39ff5d 100644 --- a/app/logical/source/url/mastodon.rb +++ b/app/logical/source/url/mastodon.rb @@ -77,6 +77,14 @@ class Source::URL::Mastodon < Source::URL full_image_url.present? end + def page_url + if username.present? && work_id.present? + "https://#{host}/@#{username}/#{work_id}" + elsif work_id.present? + "https://#{host}/web/statuses/#{work_id}" + end + end + def profile_url if username.present? "https://#{host}/@#{username}" diff --git a/app/logical/source/url/moebooru.rb b/app/logical/source/url/moebooru.rb index 10e4e7679..086f70b64 100644 --- a/app/logical/source/url/moebooru.rb +++ b/app/logical/source/url/moebooru.rb @@ -85,6 +85,14 @@ class Source::URL::Moebooru < Source::URL end end + def page_url + if work_id.present? + "https://#{domain}/post/show/#{work_id}" + elsif md5.present? + "https://#{domain}/post/show?md5=#{md5}" + end + end + def self.full_image_url(site_name, md5, file_ext, post_id = nil) case site_name when "Yande.re" diff --git a/app/logical/source/url/newgrounds.rb b/app/logical/source/url/newgrounds.rb index fbab609c0..32a45a324 100644 --- a/app/logical/source/url/newgrounds.rb +++ b/app/logical/source/url/newgrounds.rb @@ -59,6 +59,12 @@ class Source::URL::Newgrounds < Source::URL url.host == "art.ngfiles.com" end + def page_url + if username.present? && work_title.present? + "https://www.newgrounds.com/art/view/#{username}/#{work_title}" + end + end + def profile_url "https://#{username}.newgrounds.com" if username.present? end diff --git a/app/logical/source/url/nijie.rb b/app/logical/source/url/nijie.rb index 658577b69..e988c5d30 100644 --- a/app/logical/source/url/nijie.rb +++ b/app/logical/source/url/nijie.rb @@ -94,6 +94,10 @@ class Source::URL::Nijie < Source::URL to_s.remove(%r{__rs_\w+/}i).gsub("http:", "https:") if image_url? end + def page_url + "https://nijie.info/view.php?id=#{work_id}" if work_id.present? + end + def profile_url "https://nijie.info/members.php?id=#{user_id}" if user_id.present? end diff --git a/app/logical/source/url/null.rb b/app/logical/source/url/null.rb new file mode 100644 index 000000000..d528018e6 --- /dev/null +++ b/app/logical/source/url/null.rb @@ -0,0 +1,201 @@ +# frozen_string_literal: true + +class Source::URL::Null < Source::URL + attr_reader :work_id, :page_url + + def self.match?(url) + true + end + + def site_name + case host + when /ask\.fm\z/i + "Ask.fm" + when /bcy\.net\z/i + "BCY" + when /booth\.pm\z/i + "Booth.pm" + when /circle\.ms\z/i + "Circle.ms" + when /dlsite\.(com|net)\z/i + "DLSite" + when /doujinshi\.mugimugi\.org\z/i + "Doujinshi.org" + when /ko-fi\.com\z/i + "Ko-fi" + when /mixi\.jp\z/i + "Mixi.jp" + when /piapro\.jp\z/i + "Piapro.jp" + when /sakura\.ne\.jp\z/i + "Sakura.ne.jp" + else + # "www.melonbooks.co.jp" => "Melonbooks" + parsed_domain.sld.titleize + end + end + + def parse + case [subdomain, domain, *path_segments] + + # http://nekomataya.net/diarypro/data/upfile/66-1.jpg + # http://www117.sakura.ne.jp/~cat_rice/diarypro/data/upfile/31-1.jpg + # http://webknight0.sakura.ne.jp/cgi-bin/diarypro/data/upfile/9-1.jpg + in _, _, *subdirs, "diarypro", "data", "upfile", /^(\d+)-\d+\.(jpg|png|gif)$/ => file + @work_id = $1 + @page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/") + + # http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=723-4.jpg + # http://www.danshaku.sakura.ne.jp/cgi-bin/diarypro/diary.cgi?mode=image&upfile=56-1.jpg + # http://www.yanbow.com/~myanie/diarypro/diary.cgi?mode=image&upfile=279-1.jpg + in _, _, *subdirs, "diarypro", "diary.cgi" if params[:mode] == "image" && params[:upfile].present? + @work_id = params[:upfile][/^\d+/] + @page_url = [site, *subdirs, "diarypro/diary.cgi?no=#{@work_id}"].join("/") + + # http://com2.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/018.jpg + # http://sozai.doujinantena.com/contents_jpg/cf0224563cf7a75450596308fe651d5f/009.jpg + in _, "doujinantena.com", "contents_jpg", /^\h{32}$/ => md5, *rest + @md5 = md5 + @page_url = "http://doujinantena.com/page.php?id=#{md5}" + + # https://e-shuushuu.net/images/2017-07-19-915628.jpeg + in _, "e-shuushuu.net", "images", /^\d{4}-\d{2}-\d{2}-(\d+)\.(jpeg|jpg|png|gif)$/i + @work_id = $1 + @page_url = "https://e-shuushuu.net/image/#{@work_id}" + + # https://scontent.fmnl9-2.fna.fbcdn.net/v/t1.6435-9/196345051_961754654392125_8855002558147907833_n.jpg?_nc_cat=103&ccb=1-5&_nc_sid=0debeb&_nc_ohc=EB1RGiEOtyEAX9XE7aL&_nc_ht=scontent.fmnl9-2.fna&oh=00_AT8NNz_keqQ6VJeC1UVSMULhjaP3iykm-ONSMR7IrtarUQ&oe=6257862E + # https://scontent.fmnl8-2.fna.fbcdn.net/v/t1.6435-9/fr/cp0/e15/q65/80900683_480934615898749_6481759463945535488_n.jpg?_nc_cat=107&ccb=1-3&_nc_sid=8024bb&_nc_ohc=cCYFUzyHDmUAX-YHJIw&_nc_ht=scontent.fmnl8-2.fna&oh=e45c3837afcfefb6a4d93adfecef88c1&oe=60F6E392 + # https://scontent.fmnl13-1.fna.fbcdn.net/v/t31.18172-8/22861751_1362164640578443_432921612329393062_o.jpg + # https://scontent-sin1-1.xx.fbcdn.net/hphotos-xlp1/t31.0-8/s960x960/12971037_586686358150819_495608200196301072_o.jpg + in _, "fbcdn.net", *subdirs, /^\d+_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/ + @work_id = $1 + @page_url = "https://www.facebook.com/photo?fbid=#{@work_id}" + + # https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xlp1/t31.0-8/s960x960/13173066_623015164516858_1844421675339995359_o.jpg + # https://fbcdn-sphotos-h-a.akamaihd.net/hphotos-ak-xpf1/v/t1.0-9/s720x720/12032214_991569624217563_4908408819297057893_n.png?oh=efe6ea26aed89c8a12ddc1832b1f0157&oe=5667D5B1&__gda__=1453845772_c742c726735047f2feb836b845ff296f + in /fbcdn/, "akamaihd.net", *subdirs, /^\d_(\d+)_(?:\d+_){1,3}[no]\.(jpg|png)$/ + @work_id = $1 + @page_url = "https://www.facebook.com/photo.php?fbid=#{work_id}" + + # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg + # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png + # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg + # https://video-cdn3.gelbooru.com/images/62/95/6295154d082f04009160261b90e7176e.mp4 + # https://img2.gelbooru.com//images/a9/64/a96478bbf9bc3f0584f2b5ddf56025fa.webm + in _, "gelbooru.com", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpeg|jpg|png|gif|mp4|webm)$/ + @md5 = $1 + @page_url = "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{@md5}" + + # https://a.hitomi.la/galleries/907838/1.png + # https://0a.hitomi.la/galleries/1169701/23.png + # https://aa.hitomi.la/galleries/990722/003_01_002.jpg + # https://la.hitomi.la/galleries/1054851/001_main_image.jpg + in _, "hitomi.la", "galleries", gallery_id, /^(\d+)\w*\.(jpg|png|gif)$/ => image_id + @gallery_id = gallery_id + @image_id = $1.to_i + @page_url = "https://hitomi.la/reader/#{gallery_id}.html##{@image_id}" + + # https://aa.hitomi.la/galleries/883451/t_rena1g.png + in _, "hitomi.la", "galleries", gallery_id, file + @gallery_id = gallery_id + @page_url = "https://hitomi.la/galleries/#{gallery_id}.html" + + # http://www.karabako.net/images/karabako_43878.jpg + # http://www.karabako.net/imagesub/karabako_43222_215.jpg + in _, "karabako.net", ("images" | "imagesub"), /^karabako_(\d+)/ + @work_id = $1 + @page_url = "http://www.karabako.net/post/view/#{work_id}" + + # http://static.minitokyo.net/downloads/31/33/764181.jpg + in _, "minitokyo.net", "downloads", /^\d{2}$/, /^\d{2}$/, file + @work_id = filename + @page_url = "http://gallery.minitokyo.net/view/#{@work_id}" + + # http://i.minus.com/j2LcOC52dGLtB.jpg + # http://i5.minus.com/ik26grnRJAmYh.jpg + in _, "minus.com", /^[ij]([a-zA-Z0-9]{12,})\.(jpg|png|gif)$/ + @work_id = $1 + @page_url = "http://minus.com/i/#{@work_id}" + + # http://jpg.nijigen-daiaru.com/7364/013.jpg + in "jpg", "nijigen-daiaru.com", /^\d+$/ => work_id, file + @work_id = work_id + @page_url = "http://nijigen-daiaru.com/book.php?idb=#{@work_id}" + + # http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg + # http://kura3.photozou.jp/pub/741/2662741/photo/160341863_624.v1353780834.jpg + in _, "photozou.jp", "pub", /^\d+$/, user_id, "photo", /^(\d+)/ => file + @user_id = user_id + @work_id = $1 + @page_url = "https://photozou.jp/photo/show/#{@user_id}/#{@work_id}" + + # https://tulip.paheal.net/_images/4f309b2b680da9c3444ed462bb172214/3910816%20-%20Dark_Magician_Girl%20MINK343%20Yu-Gi-Oh!.jpg + # http://rule34-data-002.paheal.net/_images/2ab55f9291c8f2c68cdbeac998714028/2401510%20-%20Ash_Ketchum%20Lillie%20Porkyman.jpg + # http://rule34-images.paheal.net/c4710f05e76bdee22fcd0d62bf1ac840/262685%20-%20mabinogi%20nao.jpg + in _, "paheal.net", *subdirs, /^\h{32}$/ => md5, /^(\d+)/ => file + @md5 = md5 + @work_id = $1 + @page_url = "https://rule34.paheal.net/post/view/#{@work_id}" + + # https://api-cdn-mp4.rule34.xxx/images/4330/2f85040320f64c0e42128a8b8f6071ce.mp4 + # https://ny5webm.rule34.xxx//images/4653/3c63956b940d0ff565faa8c7555b4686.mp4?5303486 + # https://img.rule34.xxx//images/4977/7d76919c2f713c580f69fe129d2d1a44.jpeg?5668795 + # http://rule34.xxx//images/993/5625625970c9ce8c5121fde518c2c4840801cd29.jpg?992983 + # http://img3.rule34.xxx/img/rule34//images/1180/76c6497b5138c4122710c2d05458e729a8d34f7b.png?1190815 + # http://aimg.rule34.xxx//samples/1267/sample_d628f215f27815dc9c1d365a199ee68e807efac1.jpg?1309664 + in _, "rule34.xxx", ("images" | "samples"), *subdirs, /^(?:sample_)?(\h{32})\.(jpg|jpeg|png|gif|webm|mp4)$/ + @md5 = $1 + @page_url = "https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}" + + # https://cs.sankakucomplex.com/data/68/6c/686ceee03af38fe4ceb45bf1c50947e0.jpg?e=1591893718&m=fLlJfTrK_j2Rnc0uIHNC3w + # https://v.sankakucomplex.com/data/24/ff/24ff5da1fd7ed051b083b36e4e51de8e.mp4?e=1644999580&m=-OtZg2QdtKbibMte8vlsdw&expires=1644999580&token=0YUdUKKwTmvpozhG1WW_nRvSUQw3WJd574andQv-KYY + # https://cs.sankakucomplex.com/data/sample/2a/45/sample-2a45c67281b0fcfd26208063f81a3114.jpg?e=1590609355&m=cexHhVyJguoZqPB3z3N7aA + # http://c3.sankakucomplex.com/data/sample/8a/44/preview8a44211650e818ef07e5d00284c20a14.jpg + in _, "sankakucomplex.com", "data", *subdirs, /^(?:preview|sample-)?(\h{32})\.(jpg|jpeg|gif|png|webm|mp4)$/ + @md5 = $1 + @page_url = "https://chan.sankakucomplex.com/post/show?md5=#{@md5}" + + # http://shimmie.katawa-shoujo.com/image/3657.jpg + in "shimmie", "katawa-shoujo.com", "image", file + @work_id = filename + @page_url = "https://shimmie.katawa-shoujo.com/post/view/#{@work_id}" + + # http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg + # http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg + # http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg + # https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg + in ("img" | "ecdnimg"), "toranoana.jp", *subdirs, /^\d{2}$/, /^\d{4}$/, /^\d{2}$/, /^\d{2}$/, /^(\d{12})-\d+p\.jpg$/ => file + @work_id = $1 + @page_url = "https://ec.toranoana.jp/tora_r/ec/item/#{@work_id}" + + # http://p.twpl.jp/show/orig/DTaCZ + # http://p.twpl.jp/show/large/5zack + # http://p.twipple.jp/show/orig/vXqaU + in _, ("twpl.jp" | "twipple.jp"), "show", ("large" | "orig"), work_id + @work_id = work_id + @page_url = "http://p.twipple.jp/#{work_id}" + + # https://vignette.wikia.nocookie.net/queensblade/images/3/33/WGAIRI1.jpg/ + # https://vignette1.wikia.nocookie.net/valkyriecrusade/images/b/bf/Joan_Of_Arc_H.png/revision/latest?cb=20170801081004 + # https://static.wikia.nocookie.net/valkyriecrusade/images/3/3f/Joan_Of_Arc.png/revision/latest/scale-to-width-down/270?cb=20170801081000 + in _, "nocookie.net", wiki, "images", /^\h$/, /^\h\h$/, file, *rest + @wiki = wiki + @file = file + @page_url = "https://#{wiki}.fandom.com/wiki/File:#{file}" + + # https://static.zerochan.net/Fullmetal.Alchemist.full.2831797.png + # https://s1.zerochan.net/Cocoa.Cookie.600.2957938.jpg + # http://static.zerochan.net/full/24/13/90674.jpg + in _, "zerochan.net", *subdirs, /(\d+)\.(jpg|png|gif)$/ + @work_id = $1 + @page_url = "https://www.zerochan.net/#{@work_id}#full" + + # http://www.zerochan.net/full/1567893 + in _, "zerochan.net", "full", /^\d+$/ => work_id + @work_id = work_id + @page_url = "https://www.zerochan.net/#{@work_id}#full" + + else + end + end +end diff --git a/app/logical/source/url/plurk.rb b/app/logical/source/url/plurk.rb index 97e300f4d..0c1a5611a 100644 --- a/app/logical/source/url/plurk.rb +++ b/app/logical/source/url/plurk.rb @@ -52,6 +52,10 @@ class Source::URL::Plurk < Source::URL host == "images.plurk.com" end + def page_url + "https://www.plurk.com/p/#{work_id}" if work_id.present? + end + def profile_url "https://www.plurk.com/#{username}" if username.present? end diff --git a/app/logical/source/url/skeb.rb b/app/logical/source/url/skeb.rb index 20efd6bc3..dc62b5a7f 100644 --- a/app/logical/source/url/skeb.rb +++ b/app/logical/source/url/skeb.rb @@ -43,6 +43,10 @@ class Source::URL::Skeb < Source::URL end end + def page_url + "https://skeb.jp/@#{username}/works/#{work_id}" if username.present? && work_id.present? + end + def profile_url "https://skeb.jp/@#{username}" if username.present? end diff --git a/app/logical/source/url/tumblr.rb b/app/logical/source/url/tumblr.rb index c51bc0fc4..771f15e24 100644 --- a/app/logical/source/url/tumblr.rb +++ b/app/logical/source/url/tumblr.rb @@ -85,8 +85,7 @@ class Source::URL::Tumblr < Source::URL end def page_url - return nil unless @blog_name.present? && @work_id.present? - "https://#{@blog_name}.tumblr.com/post/#{@work_id}" + "https://#{blog_name}.tumblr.com/post/#{work_id}" if blog_name.present? && work_id.present? end def profile_url diff --git a/app/logical/source/url/twit_pic.rb b/app/logical/source/url/twit_pic.rb index 61a588e17..d814be4f4 100644 --- a/app/logical/source/url/twit_pic.rb +++ b/app/logical/source/url/twit_pic.rb @@ -72,8 +72,7 @@ class Source::URL::TwitPic < Source::URL end def page_url - return nil unless base36_id.present? - "https://twitpic.com/#{base36_id}" + "https://twitpic.com/#{base36_id}" if base36_id.present? end def profile_url diff --git a/app/logical/source/url/twitter.rb b/app/logical/source/url/twitter.rb index ca49e6cda..f2855fd38 100644 --- a/app/logical/source/url/twitter.rb +++ b/app/logical/source/url/twitter.rb @@ -26,7 +26,7 @@ class Source::URL::Twitter < Source::URL # https://developer.twitter.com/en/docs/developer-utilities/configuration/api-reference/get-help-configuration RESERVED_USERNAMES = %w[home i intent search] - attr_reader :status_id, :twitter_username, :user_id + attr_reader :status_id, :username, :user_id def self.match?(url) return false if Source::URL::TwitPic.match?(url) # TwitPic uses https://o.twimg.com/ URLs @@ -45,12 +45,12 @@ class Source::URL::Twitter < Source::URL # https://twitter.com/Kekeflipnote/status/1496555599718498319/video/1 # https://twitter.com/sato_1_11/status/1496489742791475201/photo/2 in "twitter.com", username, "status", status_id, *rest - @twitter_username = username + @username = username @status_id = status_id # https://twitter.com/motty08111213 in "twitter.com", username, *rest - @twitter_username = username unless username.in?(RESERVED_USERNAMES) + @username = username unless username.in?(RESERVED_USERNAMES) # https://twitter.com/intent/user?user_id=1485229827984531457 in "twitter.com", "intent", "user" if params[:user_id].present? @@ -58,7 +58,7 @@ class Source::URL::Twitter < Source::URL # https://twitter.com/intent/user?screen_name=ryuudog_NFT in "twitter.com", "intent", "user" if params[:screen_name].present? - @twitter_username = params[:screen_name] + @username = params[:screen_name] # https://twitter.com/i/user/889592953 in "twitter.com", "i", "user", user_id @@ -101,9 +101,17 @@ class Source::URL::Twitter < Source::URL "#{site}/#{@file_path}:orig" end + def page_url + if username.present? && status_id.present? + "https://twitter.com/#{username}/status/#{status_id}" + elsif status_id.present? + "https://twitter.com/i/web/status/#{status_id}" + end + end + def profile_url - if twitter_username.present? - "https://twitter.com/#{twitter_username}" + if username.present? + "https://twitter.com/#{username}" elsif user_id.present? # "https://twitter.com/i/user/#{user_id} "https://twitter.com/intent/user?user_id=#{user_id}" diff --git a/app/logical/source/url/weibo.rb b/app/logical/source/url/weibo.rb index b759d0e66..56edd7575 100644 --- a/app/logical/source/url/weibo.rb +++ b/app/logical/source/url/weibo.rb @@ -104,7 +104,7 @@ class Source::URL::Weibo < Source::URL end end - def normalized_url + def page_url if @artist_short_id.present? && @illust_base62_id.present? "https://www.weibo.com/#{@artist_short_id}/#{@illust_base62_id}" elsif mobile_url.present? diff --git a/app/logical/sources/strategies.rb b/app/logical/sources/strategies.rb index 1d9baafad..4062b8d33 100644 --- a/app/logical/sources/strategies.rb +++ b/app/logical/sources/strategies.rb @@ -23,7 +23,6 @@ module Sources Strategies::Foundation, Strategies::Plurk, Strategies::Tinami, - Strategies::TwitPic, Strategies::Fantia, ] end @@ -36,9 +35,5 @@ module Sources def self.canonical(url, referer) find(url, referer).canonical_url end - - def self.normalize_source(url) - find(url).normalize_for_source || url - end end end diff --git a/app/logical/sources/strategies/art_station.rb b/app/logical/sources/strategies/art_station.rb index bdbb99e83..570966d79 100644 --- a/app/logical/sources/strategies/art_station.rb +++ b/app/logical/sources/strategies/art_station.rb @@ -52,16 +52,6 @@ module Sources::Strategies end end - def normalize_for_source - return if project_id.blank? - - if artist_name_from_url.present? - "https://#{artist_name_from_url}.artstation.com/projects/#{project_id}" - else - "https://www.artstation.com/artwork/#{project_id}" - end - end - def image_urls_from_api api_response[:assets].to_a.map do |asset| if asset[:asset_type] == "image" diff --git a/app/logical/sources/strategies/base.rb b/app/logical/sources/strategies/base.rb index 6768d3103..a03c578e3 100644 --- a/app/logical/sources/strategies/base.rb +++ b/app/logical/sources/strategies/base.rb @@ -138,12 +138,6 @@ module Sources end memoize :http_downloader - # Given a post/image url, this is the normalized url that will be displayed in a post's page in its stead. - # This function should never make any network call, even indirectly. Return nil to never normalize. - def normalize_for_source - nil - end - def artists ArtistFinder.find_artists(profile_url) end diff --git a/app/logical/sources/strategies/deviant_art.rb b/app/logical/sources/strategies/deviant_art.rb index 0e9dd6ec5..ab0190695 100644 --- a/app/logical/sources/strategies/deviant_art.rb +++ b/app/logical/sources/strategies/deviant_art.rb @@ -65,10 +65,6 @@ module Sources end end - def normalize_for_source - page_url_from_image_url - end - def profile_url return nil if artist_name.blank? "https://www.deviantart.com/#{artist_name.downcase}" diff --git a/app/logical/sources/strategies/fanbox.rb b/app/logical/sources/strategies/fanbox.rb index 7ae584b37..c257370b0 100644 --- a/app/logical/sources/strategies/fanbox.rb +++ b/app/logical/sources/strategies/fanbox.rb @@ -34,19 +34,6 @@ module Sources end end - def normalize_for_source - if illust_id.present? - if artist_name_from_url.present? - "https://#{artist_name_from_url}.fanbox.cc/posts/#{illust_id}" - elsif artist_id_from_url.present? - "https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}/post/#{illust_id}" - end - elsif artist_id_from_url.present? - # Cover images - "https://www.pixiv.net/fanbox/creator/#{artist_id_from_url}" - end - end - def profile_url return if artist_name.blank? diff --git a/app/logical/sources/strategies/fantia.rb b/app/logical/sources/strategies/fantia.rb index 5c05ac6ce..fabe37640 100644 --- a/app/logical/sources/strategies/fantia.rb +++ b/app/logical/sources/strategies/fantia.rb @@ -128,10 +128,6 @@ module Sources::Strategies DText.from_html(artist_commentary_desc) end - def normalize_for_source - page_url - end - def work_type parsed_url.work_type || parsed_referer&.work_type end diff --git a/app/logical/sources/strategies/foundation.rb b/app/logical/sources/strategies/foundation.rb index daa039892..599d26258 100644 --- a/app/logical/sources/strategies/foundation.rb +++ b/app/logical/sources/strategies/foundation.rb @@ -76,10 +76,6 @@ module Sources DText.from_html(artist_commentary_desc) end - def normalize_for_source - page_url - end - def api_response return {} if page.nil? diff --git a/app/logical/sources/strategies/hentai_foundry.rb b/app/logical/sources/strategies/hentai_foundry.rb index 29031317e..52d89e893 100644 --- a/app/logical/sources/strategies/hentai_foundry.rb +++ b/app/logical/sources/strategies/hentai_foundry.rb @@ -68,10 +68,6 @@ module Sources DText.from_html(artist_commentary_desc).gsub(/\A[[:space:]]+|[[:space:]]+\z/, "").gsub(/\n+/, "\n") end - def normalize_for_source - page_url - end - def illust_id parsed_url.work_id || parsed_referer&.work_id end diff --git a/app/logical/sources/strategies/lofter.rb b/app/logical/sources/strategies/lofter.rb index d3dbf593b..3a31d55de 100644 --- a/app/logical/sources/strategies/lofter.rb +++ b/app/logical/sources/strategies/lofter.rb @@ -47,10 +47,6 @@ module Sources page&.search(".ct .text, .content .text, .posts .photo .text").to_a.compact.first&.to_html end - def normalize_for_source - page_url - end - def illust_id parsed_url.work_id || parsed_referer&.work_id end diff --git a/app/logical/sources/strategies/mastodon.rb b/app/logical/sources/strategies/mastodon.rb index e6c813856..92a00ee8b 100644 --- a/app/logical/sources/strategies/mastodon.rb +++ b/app/logical/sources/strategies/mastodon.rb @@ -79,10 +79,6 @@ module Sources::Strategies api_response.tags end - def normalize_for_source - page_url - end - def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc) do |element| if element.name == "a" diff --git a/app/logical/sources/strategies/moebooru.rb b/app/logical/sources/strategies/moebooru.rb index 3fa8d6fa3..9e5c249dd 100644 --- a/app/logical/sources/strategies/moebooru.rb +++ b/app/logical/sources/strategies/moebooru.rb @@ -25,17 +25,6 @@ module Sources image_urls.first end - def normalize_for_source - id = post_id_from_url - md5 = post_md5_from_url - - if id.present? - "https://#{domain}/post/show/#{id}" - elsif md5.present? - "https://#{domain}/post?tags=md5:#{md5}" - end - end - def tags api_response[:tags].to_s.split.map do |tag| [tag, "https://#{domain}/post?tags=#{CGI.escape(tag)}"] diff --git a/app/logical/sources/strategies/newgrounds.rb b/app/logical/sources/strategies/newgrounds.rb index fdc1dd3b3..778aa64a0 100644 --- a/app/logical/sources/strategies/newgrounds.rb +++ b/app/logical/sources/strategies/newgrounds.rb @@ -84,10 +84,6 @@ module Sources end end - def normalize_for_source - page_url - end - def user_name parsed_url.username || parsed_referer&.username end diff --git a/app/logical/sources/strategies/nico_seiga.rb b/app/logical/sources/strategies/nico_seiga.rb index d56609574..86c8dbb92 100644 --- a/app/logical/sources/strategies/nico_seiga.rb +++ b/app/logical/sources/strategies/nico_seiga.rb @@ -66,10 +66,6 @@ module Sources end.gsub(/[^\w]im(\d+)/, ' seiga #\1 ').chomp end - def normalize_for_source - page_url - end - def tag_name return if api_client&.user_id.blank? "nicoseiga#{api_client.user_id}" diff --git a/app/logical/sources/strategies/nijie.rb b/app/logical/sources/strategies/nijie.rb index 57e7690bd..67484f0cd 100644 --- a/app/logical/sources/strategies/nijie.rb +++ b/app/logical/sources/strategies/nijie.rb @@ -109,12 +109,6 @@ module Sources artist_id_from_url || artist_id_from_page end - def normalize_for_source - return if illust_id.blank? - - "https://nijie.info/view.php?id=#{illust_id}" - end - def doujin? page&.at("#dojin_left").present? end diff --git a/app/logical/sources/strategies/null.rb b/app/logical/sources/strategies/null.rb index aa1b45938..a6a1cb01e 100644 --- a/app/logical/sources/strategies/null.rb +++ b/app/logical/sources/strategies/null.rb @@ -18,112 +18,6 @@ module Sources def artists ArtistFinder.find_artists(url) end - - def normalize_for_source - case url - when %r{\Ahttp://www\.karabako\.net/images(?:ub)?/karabako_(\d+)(?:_\d+)?\.}i - "http://www.karabako.net/post/view/#{$1}" - - # XXX http://twipple.jp is defunct - # http://p.twpl.jp/show/orig/myRVs - when %r{\Ahttp://p\.twpl\.jp/show/(?:large|orig)/([a-z0-9]+)}i - "http://p.twipple.jp/#{$1}" - - when %r{\Ahttps?://blog(?:(?:-imgs-)?\d*(?:-origin)?)?\.fc2\.com/(?:(?:[^/]/){3}|(?:[^/]/))([^/]+)/(?:file/)?([^.]+\.[^?]+)}i - username = $1 - filename = $2 - "http://#{username}.blog.fc2.com/img/#{filename}/" - - when %r{\Ahttps?://diary(\d)?\.fc2\.com/user/([^/]+)/img/(\d+)_(\d+)/(\d+)\.}i - server_id = $1 - username = $2 - year = $3 - month = $4 - day = $5 - "http://diary#{server_id}.fc2.com/cgi-sys/ed.cgi/#{username}?Y=#{year}&M=#{month}&D=#{day}" - - when %r{\Ahttps?://(?:fbcdn-)?s(?:content|photos)-[^/]+\.(?:fbcdn|akamaihd)\.net/hphotos-.+/\d+_(\d+)_(?:\d+_){1,3}[no]\.}i - "https://www.facebook.com/photo.php?fbid=#{$1}" - - when %r{\Ahttps?://c(?:s|han|[1-4])\.sankakucomplex\.com/data(?:/sample)?/(?:[a-f0-9]{2}/){2}(?:sample-|preview)?([a-f0-9]{32})}i - "https://chan.sankakucomplex.com/en/post/show?md5=#{$1}" - - when %r{\Ahttps?://(?:www|s(?:tatic|[1-4]))\.zerochan\.net/.+(?:\.|\/)(\d+)(?:\.(?:jpe?g?|png))?\z}i - "https://www.zerochan.net/#{$1}#full" - - when %r{\Ahttps?://static[1-6]?\.minitokyo\.net/(?:downloads|view)/(?:\d{2}/){2}(\d+)}i - "http://gallery.minitokyo.net/download/#{$1}" - - # https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg - # http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png - # https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg - when %r{\Ahttps?://(?:\w+\.)?gelbooru\.com//?(?:images|samples)/(?:\d+|\h\h/\h\h)/(?:sample_)?(?\h{32})\.}i - "https://gelbooru.com/index.php?page=post&s=list&tags=md5:#{$~[:md5]}" - - when %r{\Ahttps?://(?:slot\d*\.)?im(?:g|ages)\d*\.wikia\.(?:nocookie\.net|com)/(?:_{2}cb\d{14}/)?([^/]+)(?:/[a-z]{2})?/images/(?:(?:thumb|archive)?/)?[a-f0-9]/[a-f0-9]{2}/(?:\d{14}(?:!|%21))?([^/]+)}i - subdomain = $1 - filename = $2 - "https://#{subdomain}.wikia.com/wiki/File:#{filename}" - - when %r{\Ahttps?://vignette(?:\d*)\.wikia\.nocookie\.net/([^/]+)/images/[a-f0-9]/[a-f0-9]{2}/([^/]+)}i - subdomain = $1 - filename = $2 - "https://#{subdomain}.wikia.com/wiki/File:#{filename}" - - when %r{\Ahttps?://e-shuushuu.net/images/\d{4}-(?:\d{2}-){2}(\d+)}i - "https://e-shuushuu.net/image/#{$1}" - - when %r{\Ahttps?://jpg\.nijigen-daiaru\.com/(\d+)}i - "http://nijigen-daiaru.com/book.php?idb=#{$1}" - - when %r{\Ahttps?://sozai\.doujinantena\.com/contents_jpg/([a-f0-9]{32})/}i - "http://doujinantena.com/page.php?id=#{$1}" - - when %r{\Ahttps?://rule34-(?:data-\d{3}|images)\.paheal\.net/(?:_images/)?([a-f0-9]{32})}i - "https://rule34.paheal.net/post/list/md5:#{$1}/1" - - when %r{\Ahttps?://shimmie\.katawa-shoujo\.com/image/(\d+)}i - "https://shimmie.katawa-shoujo.com/post/view/#{$1}" - - when %r{\Ahttps://(?:(?:\w+\.)?rule34\.xxx|img\.booru\.org/(?:rule34|r34))(?:/(?:img/rule34|r34))?/{1,2}images/\d+/([a-f0-9]{32})\.}i - "https://rule34.xxx/index.php?page=post&s=list&md5=#{$1}" - - when %r{(\Ahttps?://.+)/diarypro/d(?:ata/upfile/|iary\.cgi\?mode=image&upfile=)(\d+)}i - base_url = $1 - entry_no = $2 - "#{base_url}/diarypro/diary.cgi?no=#{entry_no}" - - # XXX site is defunct - when %r{\Ahttps?://i(?:\d)?\.minus\.com/(?:i|j)([^\.]{12,})}i - "http://minus.com/i/#{$1}" - - # http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg - # http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg - when %r{\Ahttps?://\w+\.photozou\.jp/pub/\d+/(?\d+)/photo/(?\d+)_.*$}i - "https://photozou.jp/photo/show/#{$~[:artist_id]}/#{$~[:photo_id]}" - - # http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg - # http://img.toranoana.jp/popup_img18/04/0010/22/87/040010228714-1p.jpg - # http://img.toranoana.jp/popup_blimg/04/0030/08/30/040030083068-1p.jpg - # https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg - when %r{\Ahttps?://(?:\w+\.)?toranoana\.jp/(?:popup_(?:bl)?img\d*|ec/img)/\d{2}/\d{4}/\d{2}/\d{2}/(?\d+)}i - "https://ec.toranoana.jp/tora_r/ec/item/#{$~[:work_id]}/" - - # https://a.hitomi.la/galleries/907838/1.png - # https://0a.hitomi.la/galleries/1169701/23.png - # https://aa.hitomi.la/galleries/990722/003_01_002.jpg - # https://la.hitomi.la/galleries/1054851/001_main_image.jpg - when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?\d+)/(?\d+)\w*\.[a-z]+\z}i - "https://hitomi.la/reader/#{$~[:gallery_id]}.html##{$~[:image_id].to_i}" - - # https://aa.hitomi.la/galleries/883451/t_rena1g.png - when %r{\Ahttps?://\w+\.hitomi\.la/galleries/(?\d+)/\w*\.[a-z]+\z}i - "https://hitomi.la/galleries/#{$~[:gallery_id]}.html" - - else - nil - end - end end end end diff --git a/app/logical/sources/strategies/pixiv.rb b/app/logical/sources/strategies/pixiv.rb index 75101e087..d19d20742 100644 --- a/app/logical/sources/strategies/pixiv.rb +++ b/app/logical/sources/strategies/pixiv.rb @@ -96,11 +96,6 @@ module Sources api_illust[:description] end - def normalize_for_source - return nil if illust_id.blank? - "https://www.pixiv.net/artworks/#{illust_id}" - end - def tag_name moniker end diff --git a/app/logical/sources/strategies/plurk.rb b/app/logical/sources/strategies/plurk.rb index 2c3de7380..484e62db4 100644 --- a/app/logical/sources/strategies/plurk.rb +++ b/app/logical/sources/strategies/plurk.rb @@ -110,10 +110,6 @@ module Sources end.gsub(/\A[[:space:]]+|[[:space:]]+\z/, "") end - def normalize_for_source - page_url - end - memoize :page, :page_json, :api_replies end end diff --git a/app/logical/sources/strategies/skeb.rb b/app/logical/sources/strategies/skeb.rb index 66008bcfa..55bfd3b22 100644 --- a/app/logical/sources/strategies/skeb.rb +++ b/app/logical/sources/strategies/skeb.rb @@ -48,10 +48,6 @@ module Sources "https://skeb.jp/@#{artist_name}/works/#{illust_id}" end - def normalize_for_source - page_url - end - def api_url return nil unless artist_name.present? && illust_id.present? "https://skeb.jp/api/users/#{artist_name}/works/#{illust_id}" diff --git a/app/logical/sources/strategies/tumblr.rb b/app/logical/sources/strategies/tumblr.rb index 55dfd0fdd..bd1e879ee 100644 --- a/app/logical/sources/strategies/tumblr.rb +++ b/app/logical/sources/strategies/tumblr.rb @@ -83,10 +83,6 @@ module Sources::Strategies super(tag) end - def normalize_for_source - parsed_url.page_url - end - def dtext_artist_commentary_desc DText.from_html(artist_commentary_desc).strip end diff --git a/app/logical/sources/strategies/twit_pic.rb b/app/logical/sources/strategies/twit_pic.rb deleted file mode 100644 index 944f0fb73..000000000 --- a/app/logical/sources/strategies/twit_pic.rb +++ /dev/null @@ -1,14 +0,0 @@ -# frozen_string_literal: true - -# @see Source::URL::TwitPic -module Sources::Strategies - class TwitPic < Base - def match? - Source::URL::TwitPic === parsed_url - end - - def normalize_for_source - parsed_url.page_url || url - end - end -end diff --git a/app/logical/sources/strategies/twitter.rb b/app/logical/sources/strategies/twitter.rb index 8851bfae7..053a3d196 100644 --- a/app/logical/sources/strategies/twitter.rb +++ b/app/logical/sources/strategies/twitter.rb @@ -93,14 +93,6 @@ module Sources::Strategies api_response[:full_text].to_s end - def normalize_for_source - if tag_name_from_url.present? && status_id.present? - "https://twitter.com/#{tag_name_from_url}/status/#{status_id}" - elsif status_id.present? - "https://twitter.com/i/web/status/#{status_id}" - end - end - def tags api_response.dig(:entities, :hashtags).to_a.map do |hashtag| [hashtag[:text], "https://twitter.com/hashtag/#{hashtag[:text]}"] @@ -150,7 +142,7 @@ module Sources::Strategies end def tag_name_from_url - parsed_url.twitter_username || parsed_referer&.twitter_username + parsed_url.username || parsed_referer&.username end memoize :api_response diff --git a/app/logical/sources/strategies/weibo.rb b/app/logical/sources/strategies/weibo.rb index aaec04954..1abb24d11 100644 --- a/app/logical/sources/strategies/weibo.rb +++ b/app/logical/sources/strategies/weibo.rb @@ -87,10 +87,6 @@ module Sources end end - def normalize_for_source - parsed_url.normalized_url - end - def api_response return {} if (mobile_url = parsed_url.mobile_url || parsed_referer&.mobile_url).blank? diff --git a/app/models/post.rb b/app/models/post.rb index 7ea360ff4..41cdf049f 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -307,15 +307,16 @@ class Post < ApplicationRecord end end + def parsed_source + Source::URL.parse(source) if web_source? + end + def normalized_source - return source unless web_source? - Sources::Strategies.normalize_source(source) + parsed_source&.page_url || source end def source_domain - return "" unless web_source? - - Danbooru::URL.parse(normalized_source)&.domain.to_s + parsed_source&.domain.to_s end end diff --git a/test/unit/sources/art_station_test.rb b/test/unit/sources/art_station_test.rb index aaf544874..5ce62bd72 100644 --- a/test/unit/sources/art_station_test.rb +++ b/test/unit/sources/art_station_test.rb @@ -194,22 +194,13 @@ module Sources assert_equal("sa-dui", site.artist_name) end - context "normalizing for source" do - should "normalize correctly" do - source1 = "https://www.artstation.com/artwork/ghost-in-the-shell-fandom" - source2 = "https://anubis1982918.artstation.com/projects/qPVGP/" - source3 = "https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041" - - assert_equal(source1, Sources::Strategies.normalize_source(source1)) - assert_equal("https://anubis1982918.artstation.com/projects/qPVGP", Sources::Strategies.normalize_source(source2)) - assert_equal("https://dudeunderscore.artstation.com/projects/NoNmD", Sources::Strategies.normalize_source(source3)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source1 = "http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236" - bad_source2 = "https://www.artstation.com" - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) + context "generating page urls" do + should "work" do + assert_equal("https://www.artstation.com/artwork/ghost-in-the-shell-fandom", Source::URL.page_url("https://www.artstation.com/artwork/ghost-in-the-shell-fandom")) + assert_equal("https://www.artstation.com/artwork/qPVGP", Source::URL.page_url("https://anubis1982918.artstation.com/projects/qPVGP/")) + assert_equal("https://www.artstation.com/artwork/NoNmD", Source::URL.page_url("https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041")) + assert_nil(Source::URL.page_url("http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236")) + assert_nil(Source::URL.page_url("https://www.artstation.com")) end end end diff --git a/test/unit/sources/deviant_art_test.rb b/test/unit/sources/deviant_art_test.rb index 538c4ea19..38320fdd2 100644 --- a/test/unit/sources/deviant_art_test.rb +++ b/test/unit/sources/deviant_art_test.rb @@ -377,8 +377,8 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls " do + should "work" do source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg" source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg" source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png" @@ -386,21 +386,18 @@ module Sources source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc" source6 = "https://fav.me/dbc3a48" - assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Sources::Strategies.normalize_source(source1)) - assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Sources::Strategies.normalize_source(source2)) - assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Sources::Strategies.normalize_source(source3)) - assert_equal("https://www.deviantart.com/deviation/417560500", Sources::Strategies.normalize_source(source4)) - assert_equal("https://www.deviantart.com/deviation/599977532", Sources::Strategies.normalize_source(source5)) - assert_equal("https://www.deviantart.com/deviation/685436408", Sources::Strategies.normalize_source(source6)) + assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Source::URL.page_url(source1)) + assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Source::URL.page_url(source2)) + assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Source::URL.page_url(source3)) + assert_equal("https://www.deviantart.com/deviation/417560500", Source::URL.page_url(source4)) + assert_equal("https://www.deviantart.com/deviation/599977532", Source::URL.page_url(source5)) + assert_equal("https://www.deviantart.com/deviation/685436408", Source::URL.page_url(source6)) end - should "avoid normalizing unnormalizable urls" do - bad_source1 = "http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg" - bad_source2 = "http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg" - bad_source3 = "https://deviantart.net" - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) - assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3)) + should "handle inconvertible urls" do + assert_nil(Source::URL.page_url("http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg")) + assert_nil(Source::URL.page_url("http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg")) + assert_nil(Source::URL.page_url("https://deviantart.net")) end end end diff --git a/test/unit/sources/fanbox_test.rb b/test/unit/sources/fanbox_test.rb index bc7420793..54c9ab4d6 100644 --- a/test/unit/sources/fanbox_test.rb +++ b/test/unit/sources/fanbox_test.rb @@ -135,17 +135,15 @@ module Sources end end - context "normalizing for source" do - should "normalize cover images to the profile link" do + context "generating page urls" do + should "convert cover images to the profile url" do cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg" - assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Sources::Strategies.normalize_source(cover)) + assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Source::URL.page_url(cover)) end - should "avoid normalizing unnormalizable urls" do - bad_source1 = "https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg" - bad_source2 = "https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png" - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) + should "handle inconvertible urls" do + assert_nil(Source::URL.page_url("https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg")) + assert_nil(Source::URL.page_url("https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png")) end end end diff --git a/test/unit/sources/hentai_foundry_test.rb b/test/unit/sources/hentai_foundry_test.rb index c381623bb..9304904ac 100644 --- a/test/unit/sources/hentai_foundry_test.rb +++ b/test/unit/sources/hentai_foundry_test.rb @@ -82,20 +82,16 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg" source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg" source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver." - assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source1)) - assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source2)) - assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Sources::Strategies.normalize_source(source3)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source = "https://pictures.hentai-foundry.com/a/AnimeFlux" - assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source1)) + assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source2)) + assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Source::URL.page_url(source3)) + assert_nil(Source::URL.page_url("https://pictures.hentai-foundry.com/a/AnimeFlux")) end end diff --git a/test/unit/sources/mastodon_test.rb b/test/unit/sources/mastodon_test.rb index e92689913..1046615ec 100644 --- a/test/unit/sources/mastodon_test.rb +++ b/test/unit/sources/mastodon_test.rb @@ -122,25 +122,17 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do - source1 = "https://pawoo.net/@evazion/19451018/" - source2 = "https://pawoo.net/web/statuses/19451018/favorites" - source3 = "https://baraag.net/@bardbot/105732813175612920/" - - assert_equal("https://pawoo.net/@evazion/19451018", Sources::Strategies.normalize_source(source1)) - assert_equal("https://pawoo.net/web/statuses/19451018", Sources::Strategies.normalize_source(source2)) - assert_equal("https://baraag.net/@bardbot/105732813175612920", Sources::Strategies.normalize_source(source3)) + context "generating page urls" do + should "work" do + assert_equal("https://pawoo.net/@evazion/19451018", Source::URL.page_url("https://pawoo.net/@evazion/19451018/")) + assert_equal("https://pawoo.net/web/statuses/19451018", Source::URL.page_url("https://pawoo.net/web/statuses/19451018/favorites")) + assert_equal("https://baraag.net/@bardbot/105732813175612920", Source::URL.page_url("https://baraag.net/@bardbot/105732813175612920/")) end - should "avoid normalizing unnormalizable urls" do - bad_source1 = "https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png" - bad_source2 = "https://pawoo.net/@evazion/media" - bad_source3 = "https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png" - - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) - assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3)) + should "handle inconvertible urls" do + assert_nil(Source::URL.page_url("https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png")) + assert_nil(Source::URL.page_url("https://pawoo.net/@evazion/media")) + assert_nil(Source::URL.page_url("https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png")) end end diff --git a/test/unit/sources/moebooru_test.rb b/test/unit/sources/moebooru_test.rb index 8f814f3d4..8c9351b4c 100644 --- a/test/unit/sources/moebooru_test.rb +++ b/test/unit/sources/moebooru_test.rb @@ -111,29 +111,29 @@ module Sources end end - context "normalizing for source" do - should "normalize yande.re sources correctly" do + context "generating page urls" do + should "generate yande.re urls correctly" do source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png" source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg" source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg" source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png" source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg" - assert_equal("https://yande.re/post/show/377828", Sources::Strategies.normalize_source(source1)) - assert_equal("https://yande.re/post/show/349790", Sources::Strategies.normalize_source(source2)) - assert_equal("https://yande.re/post/show/469784", Sources::Strategies.normalize_source(source3)) - assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", Sources::Strategies.normalize_source(source4)) - assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", Sources::Strategies.normalize_source(source5)) + assert_equal("https://yande.re/post/show/377828", Source::URL.page_url(source1)) + assert_equal("https://yande.re/post/show/349790", Source::URL.page_url(source2)) + assert_equal("https://yande.re/post/show/469784", Source::URL.page_url(source3)) + assert_equal("https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6", Source::URL.page_url(source4)) + assert_equal("https://yande.re/post/show?md5=22577d2344fe694cf47f80563031b3cd", Source::URL.page_url(source5)) end - should "normalize konachan.com sources correctly" do + should "generate konachan.com urls correctly" do source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg" source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg" source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg" - assert_equal("https://konachan.com/post/show/270807", Sources::Strategies.normalize_source(source1)) - assert_equal("https://konachan.com/post/show/270803", Sources::Strategies.normalize_source(source2)) - assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", Sources::Strategies.normalize_source(source3)) + assert_equal("https://konachan.com/post/show/270807", Source::URL.page_url(source1)) + assert_equal("https://konachan.com/post/show/270803", Source::URL.page_url(source2)) + assert_equal("https://konachan.com/post/show?md5=99a3c4f10c327d54486259a74173fc0b", Source::URL.page_url(source3)) end end end diff --git a/test/unit/sources/newgrounds_test.rb b/test/unit/sources/newgrounds_test.rb index c083b98c7..bdc7dbc5e 100644 --- a/test/unit/sources/newgrounds_test.rb +++ b/test/unit/sources/newgrounds_test.rb @@ -98,16 +98,10 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do - source = "https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181" - - assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Sources::Strategies.normalize_source(source)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg" - assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + context "generating page urls" do + should "work" do + assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Source::URL.page_url("https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181")) + assert_nil(Source::URL.page_url("https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg")) end end end diff --git a/test/unit/sources/nico_seiga_test.rb b/test/unit/sources/nico_seiga_test.rb index 2d84adde8..16557dd6b 100644 --- a/test/unit/sources/nico_seiga_test.rb +++ b/test/unit/sources/nico_seiga_test.rb @@ -159,22 +159,18 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf" source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893" source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663" source4 = "http://seiga.nicovideo.jp/image/source?id=3312222" - assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Sources::Strategies.normalize_source(source1)) - assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Sources::Strategies.normalize_source(source2)) - assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Sources::Strategies.normalize_source(source3)) - assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Sources::Strategies.normalize_source(source4)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source = "https://seiga.nicovideo.jp" - assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Source::URL.page_url(source1)) + assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Source::URL.page_url(source2)) + assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Source::URL.page_url(source3)) + assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Source::URL.page_url(source4)) + assert_nil(Source::URL.page_url("https://seiga.nicovideo.jp")) end end diff --git a/test/unit/sources/nijie_test.rb b/test/unit/sources/nijie_test.rb index f44cac0ec..7e948c829 100644 --- a/test/unit/sources/nijie_test.rb +++ b/test/unit/sources/nijie_test.rb @@ -317,23 +317,23 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png" source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png" - assert_equal("https://nijie.info/view.php?id=218856", Sources::Strategies.normalize_source(source1)) - assert_equal("https://nijie.info/view.php?id=287736", Sources::Strategies.normalize_source(source2)) + assert_equal("https://nijie.info/view.php?id=218856", Source::URL.page_url(source1)) + assert_equal("https://nijie.info/view.php?id=287736", Source::URL.page_url(source2)) end - should "avoid normalizing unnormalizable urls" do + should "handle inconvertible urls" do bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg" bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png" bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg" - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) - assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3)) + assert_nil(Source::URL.page_url(bad_source1)) + assert_nil(Source::URL.page_url(bad_source2)) + assert_nil(Source::URL.page_url(bad_source3)) end end diff --git a/test/unit/sources/null_test.rb b/test/unit/sources/null_test.rb index 1f9a75a94..94063d82b 100644 --- a/test/unit/sources/null_test.rb +++ b/test/unit/sources/null_test.rb @@ -29,30 +29,30 @@ module Sources context "normalizing for source" do should "normalize karabako links" do source = "http://www.karabako.net/images/karabako_38835.jpg" - assert_equal("http://www.karabako.net/post/view/38835", Sources::Strategies.normalize_source(source)) + assert_equal("http://www.karabako.net/post/view/38835", Source::URL.page_url(source)) end should "normalize twipple links" do source = "http://p.twpl.jp/show/orig/mI2c3" - assert_equal("http://p.twipple.jp/mI2c3", Sources::Strategies.normalize_source(source)) + assert_equal("http://p.twipple.jp/mI2c3", Source::URL.page_url(source)) end should "normalize fc2 links" do source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png" source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg" - assert_equal("http://tuyadasi.blog.fc2.com/img/file.png/", Sources::Strategies.normalize_source(source1)) - assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Sources::Strategies.normalize_source(source2)) + assert_equal("http://tuyadasi.blog.fc2.com/img/file.png", Source::URL.page_url(source1)) + assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Source::URL.page_url(source2)) end should "normalize facebook links" do source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg" - assert_equal("https://www.facebook.com/photo.php?fbid=576443445841777", Sources::Strategies.normalize_source(source)) + assert_equal("https://www.facebook.com/photo?fbid=576443445841777", Source::URL.page_url(source)) end should "normalize sankaku links" do source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848" - assert_equal("https://chan.sankakucomplex.com/en/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Sources::Strategies.normalize_source(source)) + assert_equal("https://chan.sankakucomplex.com/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Source::URL.page_url(source)) end should "normalize zerochan links" do @@ -60,17 +60,17 @@ module Sources source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg" source3 = "http://www.zerochan.net/full/1567893" - assert_equal("https://www.zerochan.net/183273#full", Sources::Strategies.normalize_source(source1)) - assert_equal("https://www.zerochan.net/411536#full", Sources::Strategies.normalize_source(source2)) - assert_equal("https://www.zerochan.net/1567893#full", Sources::Strategies.normalize_source(source3)) + assert_equal("https://www.zerochan.net/183273#full", Source::URL.page_url(source1)) + assert_equal("https://www.zerochan.net/411536#full", Source::URL.page_url(source2)) + assert_equal("https://www.zerochan.net/1567893#full", Source::URL.page_url(source3)) end should "normalize minitokyo links" do source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg" source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019" - assert_equal("http://gallery.minitokyo.net/download/365677", Sources::Strategies.normalize_source(source1)) - assert_equal("http://gallery.minitokyo.net/download/199164", Sources::Strategies.normalize_source(source2)) + assert_equal("http://gallery.minitokyo.net/view/365677", Source::URL.page_url(source1)) + assert_equal("http://gallery.minitokyo.net/view/199164", Source::URL.page_url(source2)) end should "normalize gelbooru links" do @@ -78,87 +78,83 @@ module Sources source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png" source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg" - assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Sources::Strategies.normalize_source(source1)) - assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Sources::Strategies.normalize_source(source2)) - assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Sources::Strategies.normalize_source(source3)) + assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Source::URL.page_url(source1)) + assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2)) + assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3)) end should "normalize wikia links" do source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954" - assert_equal("https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png", Sources::Strategies.normalize_source(source)) + assert_equal("https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png", Source::URL.page_url(source)) end should "normalize e-shuushuu links" do source = "http://e-shuushuu.net/images/2014-07-22-662472.png" - assert_equal("https://e-shuushuu.net/image/662472", Sources::Strategies.normalize_source(source)) + assert_equal("https://e-shuushuu.net/image/662472", Source::URL.page_url(source)) end should "normalize nijigen-daiaru links" do source = "http://jpg.nijigen-daiaru.com/19909/029.jpg" - assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Sources::Strategies.normalize_source(source)) + assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Source::URL.page_url(source)) end should "normalize doujinantena links" do source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg" - assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Sources::Strategies.normalize_source(source)) + assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Source::URL.page_url(source)) end should "normalize paheal.net links" do source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg" - assert_equal("https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1", Sources::Strategies.normalize_source(source)) + assert_equal("https://rule34.paheal.net/post/view/852405", Source::URL.page_url(source)) end should "normalize shimmie.katawa-shoujo.com links" do source = "http://shimmie.katawa-shoujo.com/image/2740.png" - assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Sources::Strategies.normalize_source(source)) + assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Source::URL.page_url(source)) end should "normalize rule34.xxx links" do source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807" - assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Sources::Strategies.normalize_source(source)) + assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Source::URL.page_url(source)) end should "normalize diarypro links" do source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg" source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg" - assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Sources::Strategies.normalize_source(source1)) - assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Sources::Strategies.normalize_source(source2)) + assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Source::URL.page_url(source1)) + assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Source::URL.page_url(source2)) end should "normalize minus.com links" do source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg" - assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Sources::Strategies.normalize_source(source)) + assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Source::URL.page_url(source)) end should "normalize photozou links" do source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg" source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg" - assert_equal("https://photozou.jp/photo/show/1481794/161537258", Sources::Strategies.normalize_source(source1)) - assert_equal("https://photozou.jp/photo/show/1986212/118493247", Sources::Strategies.normalize_source(source2)) + assert_equal("https://photozou.jp/photo/show/1481794/161537258", Source::URL.page_url(source1)) + assert_equal("https://photozou.jp/photo/show/1986212/118493247", Source::URL.page_url(source2)) end should "normalize toranoana links" do source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg" source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg" - assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695/", Sources::Strategies.normalize_source(source1)) - assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417/", Sources::Strategies.normalize_source(source2)) + assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695", Source::URL.page_url(source1)) + assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417", Source::URL.page_url(source2)) end should "normalize hitomi.la links" do source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png" source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg" - assert_equal("https://hitomi.la/galleries/883451.html", Sources::Strategies.normalize_source(source1)) - assert_equal("https://hitomi.la/reader/1054851.html#1", Sources::Strategies.normalize_source(source2)) + assert_equal("https://hitomi.la/galleries/883451.html", Source::URL.page_url(source1)) + assert_equal("https://hitomi.la/reader/1054851.html#1", Source::URL.page_url(source2)) end should "leave unknown sources as they are" do - source1 = "https://google.com" - source2 = "a bad non-http source" - source3 = "https://example.com/Folder/中央大学.html" - - assert_equal(source1, Sources::Strategies.normalize_source(source1)) - assert_equal(source2, Sources::Strategies.normalize_source(source2)) - assert_equal(source3, Sources::Strategies.normalize_source(source3)) + assert_nil(Source::URL.page_url("https://google.com")) + assert_nil(Source::URL.page_url("a bad non-http source")) + assert_nil(Source::URL.page_url("https://example.com/Folder/中央大学.html")) end end end diff --git a/test/unit/sources/pixiv_test.rb b/test/unit/sources/pixiv_test.rb index 305b33026..76fd5c2c5 100644 --- a/test/unit/sources/pixiv_test.rb +++ b/test/unit/sources/pixiv_test.rb @@ -348,19 +348,19 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png" source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg" source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg" source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png" source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip" - assert_equal("https://www.pixiv.net/artworks/39749565", Sources::Strategies.normalize_source(source1)) - assert_equal("https://www.pixiv.net/artworks/39735353", Sources::Strategies.normalize_source(source2)) - assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source3)) - assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source4)) - assert_equal("https://www.pixiv.net/artworks/44524589", Sources::Strategies.normalize_source(source5)) + assert_equal("https://www.pixiv.net/artworks/39749565", Source::URL.page_url(source1)) + assert_equal("https://www.pixiv.net/artworks/39735353", Source::URL.page_url(source2)) + assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source3)) + assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source4)) + assert_equal("https://www.pixiv.net/artworks/44524589", Source::URL.page_url(source5)) end end end diff --git a/test/unit/sources/skeb_test.rb b/test/unit/sources/skeb_test.rb index d8a6df64a..c1b98c44e 100644 --- a/test/unit/sources/skeb_test.rb +++ b/test/unit/sources/skeb_test.rb @@ -99,10 +99,10 @@ module Sources end end - context "normalizing for source" do - should "avoid normalizing unnormalizable urls" do + context "generating page urls" do + should "handle inconvertible urls" do bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a" - assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + assert_nil(Source::URL.page_url(bad_source)) end end end diff --git a/test/unit/sources/tumblr_test.rb b/test/unit/sources/tumblr_test.rb index 30ed9bd0e..27b05d1e9 100644 --- a/test/unit/sources/tumblr_test.rb +++ b/test/unit/sources/tumblr_test.rb @@ -228,22 +228,18 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "https://octrain1020.tumblr.com/post/190713122589" source2 = "https://octrain1020.tumblr.com/image/190713122589" source3 = "https://octrain1020.tumblr.com/image/190713122589#asd" source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false" - assert_equal(source1, Sources::Strategies.normalize_source(source1)) - assert_equal(source1, Sources::Strategies.normalize_source(source2)) - assert_equal(source1, Sources::Strategies.normalize_source(source3)) - assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source = "https://octrain1020.tumblr.com/" - assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source)) + assert_equal(source1, Source::URL.page_url(source1)) + assert_equal(source1, Source::URL.page_url(source2)) + assert_equal(source1, Source::URL.page_url(source3)) + assert_equal("https://superboin.tumblr.com/post/141169066579", Source::URL.page_url(source4)) + assert_nil(Source::URL.page_url("https://octrain1020.tumblr.com/")) end end end diff --git a/test/unit/sources/twit_pic_test.rb b/test/unit/sources/twit_pic_test.rb index 6a6e8b603..82f590f3b 100644 --- a/test/unit/sources/twit_pic_test.rb +++ b/test/unit/sources/twit_pic_test.rb @@ -2,20 +2,11 @@ require 'test_helper' module Sources class TwitPicTest < ActiveSupport::TestCase - context "normalizing for source" do - should "normalize d3j5vwomefv46c.cloudfront.net links" do - source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199" - assert_equal("https://twitpic.com/dks0tb", Sources::Strategies.normalize_source(source)) - end - - should "normalize dn3pm25xmtlyu.cloudfront.net links" do - source = "https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA" - assert_equal("https://twitpic.com/dvitq3", Sources::Strategies.normalize_source(source)) - end - - should "normalize o.twimg.com links" do - source = "https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs" - assert_equal("https://twitpic.com/dtnuru", Sources::Strategies.normalize_source(source)) + context "generating page urls" do + should "work" do + assert_equal("https://twitpic.com/dks0tb", Source::URL.page_url("http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199")) + assert_equal("https://twitpic.com/dvitq3", Source::URL.page_url("https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA")) + assert_equal("https://twitpic.com/dtnuru", Source::URL.page_url("https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs")) end end end diff --git a/test/unit/sources/twitter_test.rb b/test/unit/sources/twitter_test.rb index cc562f6e9..4ea10aca2 100644 --- a/test/unit/sources/twitter_test.rb +++ b/test/unit/sources/twitter_test.rb @@ -291,18 +291,18 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "https://twitter.com/i/web/status/1261877313349640194" source2 = "https://twitter.com/BOW999/status/1261877313349640194" source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1" source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19" - assert_equal(source1, Sources::Strategies.normalize_source(source1)) - assert_equal(source2, Sources::Strategies.normalize_source(source2)) - assert_equal(source2, Sources::Strategies.normalize_source(source3)) - assert_equal(source2, Sources::Strategies.normalize_source(source4)) - assert_equal("https://www.twitter.com/irt_5433", Sources::Strategies.normalize_source("https://www.twitter.com/irt_5433")) + assert_equal(source1, Source::URL.page_url(source1)) + assert_equal(source2, Source::URL.page_url(source2)) + assert_equal(source2, Source::URL.page_url(source3)) + assert_equal(source2, Source::URL.page_url(source4)) + assert_nil(Source::URL.page_url("https://www.twitter.com/irt_5433")) end end end diff --git a/test/unit/sources/weibo_test.rb b/test/unit/sources/weibo_test.rb index fe32fd7d9..08d443b39 100644 --- a/test/unit/sources/weibo_test.rb +++ b/test/unit/sources/weibo_test.rb @@ -113,25 +113,19 @@ module Sources end end - context "normalizing for source" do - should "normalize correctly" do + context "generating page urls" do + should "work" do source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime" source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81" source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635" source4 = "https://tw.weibo.com/SEINEN/4098035921690224" - assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Sources::Strategies.normalize_source(source1)) - assert_equal("https://m.weibo.cn/detail/4242129997905387", Sources::Strategies.normalize_source(source2)) - assert_equal("https://m.weibo.cn/status/4173757483008088", Sources::Strategies.normalize_source(source3)) - assert_equal("https://m.weibo.cn/detail/4098035921690224", Sources::Strategies.normalize_source(source4)) - end - - should "avoid normalizing unnormalizable urls" do - bad_source1 = "https://weibo.com/u/" - bad_source2 = "https://www.weibo.com/4ubergine/photos" - - assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1)) - assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2)) + assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Source::URL.page_url(source1)) + assert_equal("https://m.weibo.cn/detail/4242129997905387", Source::URL.page_url(source2)) + assert_equal("https://m.weibo.cn/status/4173757483008088", Source::URL.page_url(source3)) + assert_equal("https://m.weibo.cn/detail/4098035921690224", Source::URL.page_url(source4)) + assert_nil(Source::URL.page_url("https://weibo.com/u/")) + assert_nil(Source::URL.page_url("https://www.weibo.com/4ubergine/photos")) end end end