sources: refactor normalize_for_source.

`normalize_for_source` was used to convert image URLs to page URLs when displaying sources
on the post show page. Move all the code for converting image URLs to page URLs from
`Sources::Strategies#normalize_for_source` to `Source::URL#page_url`.

Before we had to be very careful in source strategies not to make any network calls in
`normalize_for_source`, since it was used in the view for the post show page. Now all the
code for generating page URLs is isolated in Source::URL, which makes source strategies
simpler. It also makes it easier to check if a source is an image URL or page URL, and if
the image URL is convertible to a page URL, which will make autotagging bad_link or
bad_source feasible.

Finally, this fixes it to generate better page URLs in a handful of cases:

* https://www.artstation.com/artwork/qPVGP instead of https://anubis1982918.artstation.com/projects/qPVGP
* https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6s instead of https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6
* http://gallery.minitokyo.net/view/365677 instead of http://gallery.minitokyo.net/download/365677
* https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png instead of https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png
* https://rule34.paheal.net/post/view/852405 instead of https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1
This commit is contained in:
evazion
2022-03-23 00:41:56 -05:00
parent 770f850c66
commit 3aa5cab2aa
59 changed files with 471 additions and 484 deletions

View File

@@ -194,22 +194,13 @@ module Sources
assert_equal("sa-dui", site.artist_name)
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://www.artstation.com/artwork/ghost-in-the-shell-fandom"
source2 = "https://anubis1982918.artstation.com/projects/qPVGP/"
source3 = "https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal("https://anubis1982918.artstation.com/projects/qPVGP", Sources::Strategies.normalize_source(source2))
assert_equal("https://dudeunderscore.artstation.com/projects/NoNmD", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"
bad_source2 = "https://www.artstation.com"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
context "generating page urls" do
should "work" do
assert_equal("https://www.artstation.com/artwork/ghost-in-the-shell-fandom", Source::URL.page_url("https://www.artstation.com/artwork/ghost-in-the-shell-fandom"))
assert_equal("https://www.artstation.com/artwork/qPVGP", Source::URL.page_url("https://anubis1982918.artstation.com/projects/qPVGP/"))
assert_equal("https://www.artstation.com/artwork/NoNmD", Source::URL.page_url("https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"))
assert_nil(Source::URL.page_url("http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"))
assert_nil(Source::URL.page_url("https://www.artstation.com"))
end
end
end

View File

@@ -377,8 +377,8 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls " do
should "work" do
source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg"
source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg"
source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png"
@@ -386,21 +386,18 @@ module Sources
source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc"
source6 = "https://fav.me/dbc3a48"
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Sources::Strategies.normalize_source(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Sources::Strategies.normalize_source(source6))
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Source::URL.page_url(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Source::URL.page_url(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Source::URL.page_url(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Source::URL.page_url(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Source::URL.page_url(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Source::URL.page_url(source6))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"
bad_source2 = "http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"
bad_source3 = "https://deviantart.net"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"))
assert_nil(Source::URL.page_url("http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"))
assert_nil(Source::URL.page_url("https://deviantart.net"))
end
end
end

View File

@@ -135,17 +135,15 @@ module Sources
end
end
context "normalizing for source" do
should "normalize cover images to the profile link" do
context "generating page urls" do
should "convert cover images to the profile url" do
cover = "https://pixiv.pximg.net/c/400x400_90_a2_g5/fanbox/public/images/creator/1566167/profile/Ix6bnJmTaOAFZhXHLbWyIY1e.jpeg"
assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Sources::Strategies.normalize_source(cover))
assert_equal("https://www.pixiv.net/fanbox/creator/1566167", Source::URL.page_url(cover))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg"
bad_source2 = "https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("https://pixiv.pximg.net/c/936x600_90_a2_g5/fanbox/public/images/plan/4635/cover/L6AZNneFuHW6r25CHHlkpHg4.jpeg"))
assert_nil(Source::URL.page_url("https://downloads.fanbox.cc/images/post/39714/JvjJal8v1yLgc5DPyEI05YpT.png"))
end
end
end

View File

@@ -82,20 +82,16 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg"
source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg"
source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver."
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://pictures.hentai-foundry.com/a/AnimeFlux"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Source::URL.page_url(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Source::URL.page_url(source3))
assert_nil(Source::URL.page_url("https://pictures.hentai-foundry.com/a/AnimeFlux"))
end
end

View File

@@ -122,25 +122,17 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://pawoo.net/@evazion/19451018/"
source2 = "https://pawoo.net/web/statuses/19451018/favorites"
source3 = "https://baraag.net/@bardbot/105732813175612920/"
assert_equal("https://pawoo.net/@evazion/19451018", Sources::Strategies.normalize_source(source1))
assert_equal("https://pawoo.net/web/statuses/19451018", Sources::Strategies.normalize_source(source2))
assert_equal("https://baraag.net/@bardbot/105732813175612920", Sources::Strategies.normalize_source(source3))
context "generating page urls" do
should "work" do
assert_equal("https://pawoo.net/@evazion/19451018", Source::URL.page_url("https://pawoo.net/@evazion/19451018/"))
assert_equal("https://pawoo.net/web/statuses/19451018", Source::URL.page_url("https://pawoo.net/web/statuses/19451018/favorites"))
assert_equal("https://baraag.net/@bardbot/105732813175612920", Source::URL.page_url("https://baraag.net/@bardbot/105732813175612920/"))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"
bad_source2 = "https://pawoo.net/@evazion/media"
bad_source3 = "https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
should "handle inconvertible urls" do
assert_nil(Source::URL.page_url("https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"))
assert_nil(Source::URL.page_url("https://pawoo.net/@evazion/media"))
assert_nil(Source::URL.page_url("https://baraag.net/system/media_attachments/files/105/732/803/241/495/700/original/556e1eb7f5ca610f.png"))
end
end

View File

@@ -111,29 +111,29 @@ module Sources
end
end
context "normalizing for source" do
should "normalize yande.re sources correctly" do
context "generating page urls" do
should "generate yande.re urls correctly" do
source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png"
source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg"
source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg"
source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png"
source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg"
assert_equal("https://yande.re/post/show/377828", Sources::Strategies.normalize_source(source1))
assert_equal("https://yande.re/post/show/349790", Sources::Strategies.normalize_source(source2))
assert_equal("https://yande.re/post/show/469784", Sources::Strategies.normalize_source(source3))
assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", Sources::Strategies.normalize_source(source4))
assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", Sources::Strategies.normalize_source(source5))
assert_equal("https://yande.re/post/show/377828", Source::URL.page_url(source1))
assert_equal("https://yande.re/post/show/349790", Source::URL.page_url(source2))
assert_equal("https://yande.re/post/show/469784", Source::URL.page_url(source3))
assert_equal("https://yande.re/post/show?md5=b4b1d11facd1700544554e4805d47bb6", Source::URL.page_url(source4))
assert_equal("https://yande.re/post/show?md5=22577d2344fe694cf47f80563031b3cd", Source::URL.page_url(source5))
end
should "normalize konachan.com sources correctly" do
should "generate konachan.com urls correctly" do
source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg"
source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"
source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg"
assert_equal("https://konachan.com/post/show/270807", Sources::Strategies.normalize_source(source1))
assert_equal("https://konachan.com/post/show/270803", Sources::Strategies.normalize_source(source2))
assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", Sources::Strategies.normalize_source(source3))
assert_equal("https://konachan.com/post/show/270807", Source::URL.page_url(source1))
assert_equal("https://konachan.com/post/show/270803", Source::URL.page_url(source2))
assert_equal("https://konachan.com/post/show?md5=99a3c4f10c327d54486259a74173fc0b", Source::URL.page_url(source3))
end
end
end

View File

@@ -98,16 +98,10 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
source = "https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181"
assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Sources::Strategies.normalize_source(source))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
context "generating page urls" do
should "work" do
assert_equal("https://www.newgrounds.com/art/view/natthelich/fire-emblem-marth-plus-progress-pic", Source::URL.page_url("https://art.ngfiles.com/images/1033000/1033622_natthelich_fire-emblem-marth-plus-progress-pic.png?f1569487181"))
assert_nil(Source::URL.page_url("https://art.ngfiles.com/comments/57000/iu_57615_7115981.jpg"))
end
end
end

View File

@@ -159,22 +159,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf"
source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893"
source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"
source4 = "http://seiga.nicovideo.jp/image/source?id=3312222"
assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Sources::Strategies.normalize_source(source1))
assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Sources::Strategies.normalize_source(source2))
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Sources::Strategies.normalize_source(source3))
assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://seiga.nicovideo.jp"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal("https://seiga.nicovideo.jp/image/source/3521156", Source::URL.page_url(source1))
assert_equal("https://seiga.nicovideo.jp/image/source/3583893", Source::URL.page_url(source2))
assert_equal("https://seiga.nicovideo.jp/image/source/4937663", Source::URL.page_url(source3))
assert_equal("https://seiga.nicovideo.jp/image/source/3312222", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://seiga.nicovideo.jp"))
end
end

View File

@@ -317,23 +317,23 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png"
source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png"
assert_equal("https://nijie.info/view.php?id=218856", Sources::Strategies.normalize_source(source1))
assert_equal("https://nijie.info/view.php?id=287736", Sources::Strategies.normalize_source(source2))
assert_equal("https://nijie.info/view.php?id=218856", Source::URL.page_url(source1))
assert_equal("https://nijie.info/view.php?id=287736", Source::URL.page_url(source2))
end
should "avoid normalizing unnormalizable urls" do
should "handle inconvertible urls" do
bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg"
bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png"
bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
assert_nil(Source::URL.page_url(bad_source1))
assert_nil(Source::URL.page_url(bad_source2))
assert_nil(Source::URL.page_url(bad_source3))
end
end

View File

@@ -29,30 +29,30 @@ module Sources
context "normalizing for source" do
should "normalize karabako links" do
source = "http://www.karabako.net/images/karabako_38835.jpg"
assert_equal("http://www.karabako.net/post/view/38835", Sources::Strategies.normalize_source(source))
assert_equal("http://www.karabako.net/post/view/38835", Source::URL.page_url(source))
end
should "normalize twipple links" do
source = "http://p.twpl.jp/show/orig/mI2c3"
assert_equal("http://p.twipple.jp/mI2c3", Sources::Strategies.normalize_source(source))
assert_equal("http://p.twipple.jp/mI2c3", Source::URL.page_url(source))
end
should "normalize fc2 links" do
source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png"
source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg"
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png/", Sources::Strategies.normalize_source(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Sources::Strategies.normalize_source(source2))
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png", Source::URL.page_url(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Source::URL.page_url(source2))
end
should "normalize facebook links" do
source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg"
assert_equal("https://www.facebook.com/photo.php?fbid=576443445841777", Sources::Strategies.normalize_source(source))
assert_equal("https://www.facebook.com/photo?fbid=576443445841777", Source::URL.page_url(source))
end
should "normalize sankaku links" do
source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848"
assert_equal("https://chan.sankakucomplex.com/en/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Sources::Strategies.normalize_source(source))
assert_equal("https://chan.sankakucomplex.com/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Source::URL.page_url(source))
end
should "normalize zerochan links" do
@@ -60,17 +60,17 @@ module Sources
source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg"
source3 = "http://www.zerochan.net/full/1567893"
assert_equal("https://www.zerochan.net/183273#full", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.zerochan.net/411536#full", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.zerochan.net/1567893#full", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.zerochan.net/183273#full", Source::URL.page_url(source1))
assert_equal("https://www.zerochan.net/411536#full", Source::URL.page_url(source2))
assert_equal("https://www.zerochan.net/1567893#full", Source::URL.page_url(source3))
end
should "normalize minitokyo links" do
source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg"
source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019"
assert_equal("http://gallery.minitokyo.net/download/365677", Sources::Strategies.normalize_source(source1))
assert_equal("http://gallery.minitokyo.net/download/199164", Sources::Strategies.normalize_source(source2))
assert_equal("http://gallery.minitokyo.net/view/365677", Source::URL.page_url(source1))
assert_equal("http://gallery.minitokyo.net/view/199164", Source::URL.page_url(source2))
end
should "normalize gelbooru links" do
@@ -78,87 +78,83 @@ module Sources
source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png"
source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg"
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Sources::Strategies.normalize_source(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Sources::Strategies.normalize_source(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Sources::Strategies.normalize_source(source3))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:ee5c9a69db9602c95debdb9b98fb3e3e", Source::URL.page_url(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:edd1d2b3881cf70c3acf540780507531", Source::URL.page_url(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&tags=md5:0b3ae5e225072b8e391c827cb470d29c", Source::URL.page_url(source3))
end
should "normalize wikia links" do
source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954"
assert_equal("https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png", Sources::Strategies.normalize_source(source))
assert_equal("https://valkyriecrusade.fandom.com/wiki/File:Crimson_Hatsune_H.png", Source::URL.page_url(source))
end
should "normalize e-shuushuu links" do
source = "http://e-shuushuu.net/images/2014-07-22-662472.png"
assert_equal("https://e-shuushuu.net/image/662472", Sources::Strategies.normalize_source(source))
assert_equal("https://e-shuushuu.net/image/662472", Source::URL.page_url(source))
end
should "normalize nijigen-daiaru links" do
source = "http://jpg.nijigen-daiaru.com/19909/029.jpg"
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Sources::Strategies.normalize_source(source))
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Source::URL.page_url(source))
end
should "normalize doujinantena links" do
source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg"
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Sources::Strategies.normalize_source(source))
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Source::URL.page_url(source))
end
should "normalize paheal.net links" do
source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg"
assert_equal("https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1", Sources::Strategies.normalize_source(source))
assert_equal("https://rule34.paheal.net/post/view/852405", Source::URL.page_url(source))
end
should "normalize shimmie.katawa-shoujo.com links" do
source = "http://shimmie.katawa-shoujo.com/image/2740.png"
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Sources::Strategies.normalize_source(source))
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Source::URL.page_url(source))
end
should "normalize rule34.xxx links" do
source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807"
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Sources::Strategies.normalize_source(source))
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Source::URL.page_url(source))
end
should "normalize diarypro links" do
source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg"
source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg"
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Sources::Strategies.normalize_source(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Sources::Strategies.normalize_source(source2))
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Source::URL.page_url(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Source::URL.page_url(source2))
end
should "normalize minus.com links" do
source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg"
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Sources::Strategies.normalize_source(source))
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Source::URL.page_url(source))
end
should "normalize photozou links" do
source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg"
source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg"
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Sources::Strategies.normalize_source(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Sources::Strategies.normalize_source(source2))
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Source::URL.page_url(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Source::URL.page_url(source2))
end
should "normalize toranoana links" do
source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg"
source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg"
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695/", Sources::Strategies.normalize_source(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417/", Sources::Strategies.normalize_source(source2))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695", Source::URL.page_url(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417", Source::URL.page_url(source2))
end
should "normalize hitomi.la links" do
source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png"
source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg"
assert_equal("https://hitomi.la/galleries/883451.html", Sources::Strategies.normalize_source(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Sources::Strategies.normalize_source(source2))
assert_equal("https://hitomi.la/galleries/883451.html", Source::URL.page_url(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Source::URL.page_url(source2))
end
should "leave unknown sources as they are" do
source1 = "https://google.com"
source2 = "a bad non-http source"
source3 = "https://example.com/Folder/中央大学.html"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source3, Sources::Strategies.normalize_source(source3))
assert_nil(Source::URL.page_url("https://google.com"))
assert_nil(Source::URL.page_url("a bad non-http source"))
assert_nil(Source::URL.page_url("https://example.com/Folder/中央大学.html"))
end
end
end

View File

@@ -348,19 +348,19 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png"
source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg"
source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip"
assert_equal("https://www.pixiv.net/artworks/39749565", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Sources::Strategies.normalize_source(source5))
assert_equal("https://www.pixiv.net/artworks/39749565", Source::URL.page_url(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Source::URL.page_url(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Source::URL.page_url(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Source::URL.page_url(source5))
end
end
end

View File

@@ -99,10 +99,10 @@ module Sources
end
end
context "normalizing for source" do
should "avoid normalizing unnormalizable urls" do
context "generating page urls" do
should "handle inconvertible urls" do
bad_source = "https://skeb.imgix.net/requests/229088_2?bg=%23fff&auto=format&w=800&s=9cac8b76c0838f2df4f19ebc41c1ae0a"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_nil(Source::URL.page_url(bad_source))
end
end
end

View File

@@ -228,22 +228,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://octrain1020.tumblr.com/post/190713122589"
source2 = "https://octrain1020.tumblr.com/image/190713122589"
source3 = "https://octrain1020.tumblr.com/image/190713122589#asd"
source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://octrain1020.tumblr.com/"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source1, Source::URL.page_url(source2))
assert_equal(source1, Source::URL.page_url(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://octrain1020.tumblr.com/"))
end
end
end

View File

@@ -2,20 +2,11 @@ require 'test_helper'
module Sources
class TwitPicTest < ActiveSupport::TestCase
context "normalizing for source" do
should "normalize d3j5vwomefv46c.cloudfront.net links" do
source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"
assert_equal("https://twitpic.com/dks0tb", Sources::Strategies.normalize_source(source))
end
should "normalize dn3pm25xmtlyu.cloudfront.net links" do
source = "https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"
assert_equal("https://twitpic.com/dvitq3", Sources::Strategies.normalize_source(source))
end
should "normalize o.twimg.com links" do
source = "https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"
assert_equal("https://twitpic.com/dtnuru", Sources::Strategies.normalize_source(source))
context "generating page urls" do
should "work" do
assert_equal("https://twitpic.com/dks0tb", Source::URL.page_url("http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"))
assert_equal("https://twitpic.com/dvitq3", Source::URL.page_url("https://dn3pm25xmtlyu.cloudfront.net/photos/large/839006715.jpg?Expires=1646850828&Signature=d60CmLlmNqZJvOTteTOan13QWZ8gY3C4rUWCkh-IUoRr012vYtUYtip74GslGwCG0dxV5mpUpVFkaVZf16PiY7CsTdpAlA8Pmu2tN98D2dmC5FuW9KhhygDv6eFC8faoaGEyj~ArLuwz-8lC6Y05TVf0FgweeWwsRxFOfD5JHgCeIB0iZqzUx1t~eb6UMAWvbaKpfgvcp2oaDuCdZlMNi9T5OUBFoTh2DfnGy8t5COys1nOYYfZ9l69TDvVb2PKBaV8lsKK9xMwjoJNaWa1HL5S4MgODS5hiNDvycoBpu9KUvQ7q~rhC8cV6ZNctB5H9u~MmvBPoTKfy4w37cSc5uw__&Key-Pair-Id=APKAJROXZ7FN26MABHYA"))
assert_equal("https://twitpic.com/dtnuru", Source::URL.page_url("https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"))
end
end
end

View File

@@ -291,18 +291,18 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://twitter.com/i/web/status/1261877313349640194"
source2 = "https://twitter.com/BOW999/status/1261877313349640194"
source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1"
source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source2, Sources::Strategies.normalize_source(source3))
assert_equal(source2, Sources::Strategies.normalize_source(source4))
assert_equal("https://www.twitter.com/irt_5433", Sources::Strategies.normalize_source("https://www.twitter.com/irt_5433"))
assert_equal(source1, Source::URL.page_url(source1))
assert_equal(source2, Source::URL.page_url(source2))
assert_equal(source2, Source::URL.page_url(source3))
assert_equal(source2, Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://www.twitter.com/irt_5433"))
end
end
end

View File

@@ -113,25 +113,19 @@ module Sources
end
end
context "normalizing for source" do
should "normalize correctly" do
context "generating page urls" do
should "work" do
source1 = "https://www.weibo.com/3150932560/H4cFbeKKA?from=page_1005053150932560_profile&wvr=6&mod=weibotime"
source2 = "https://photo.weibo.com/2125874520/wbphotos/large/mid/4242129997905387/pid/7eb64558ly1friyzhj44lj20dw2qxe81"
source3 = "https://m.weibo.cn/status/4173757483008088?luicode=20000061&lfid=4170879204256635"
source4 = "https://tw.weibo.com/SEINEN/4098035921690224"
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Sources::Strategies.normalize_source(source1))
assert_equal("https://m.weibo.cn/detail/4242129997905387", Sources::Strategies.normalize_source(source2))
assert_equal("https://m.weibo.cn/status/4173757483008088", Sources::Strategies.normalize_source(source3))
assert_equal("https://m.weibo.cn/detail/4098035921690224", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://weibo.com/u/"
bad_source2 = "https://www.weibo.com/4ubergine/photos"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal("https://www.weibo.com/3150932560/H4cFbeKKA", Source::URL.page_url(source1))
assert_equal("https://m.weibo.cn/detail/4242129997905387", Source::URL.page_url(source2))
assert_equal("https://m.weibo.cn/status/4173757483008088", Source::URL.page_url(source3))
assert_equal("https://m.weibo.cn/detail/4098035921690224", Source::URL.page_url(source4))
assert_nil(Source::URL.page_url("https://weibo.com/u/"))
assert_nil(Source::URL.page_url("https://www.weibo.com/4ubergine/photos"))
end
end
end