Refactor source normalization

* Move the source normalization logic out of the post model
  and into individual sources' strategies.
* Rewrite normalization tests to be handled into each source's test,
  and expand them significantly. Previously we were only testing
  a very small subset of domains and variants.
* Fix up normalization for several sites.
* Normalize fav.me urls into normal deviantart urls.
This commit is contained in:
nonamethanks
2020-05-16 23:03:09 +02:00
parent 364343453c
commit 307df3b3e4
26 changed files with 674 additions and 315 deletions

View File

@@ -1531,60 +1531,6 @@ class PostTest < ActiveSupport::TestCase
@post.pixiv_id = nil
end
end
should "normalize pixiv links" do
@post.update!(source: "http://i2.pixiv.net/img12/img/zenze/39749565.png")
assert_equal("https://www.pixiv.net/artworks/39749565", @post.normalized_source)
@post.update!(source: "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg")
assert_equal("https://www.pixiv.net/artworks/39735353", @post.normalized_source)
@post.update!(source: "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg")
assert_equal("https://www.pixiv.net/artworks/14901720", @post.normalized_source)
@post.update!(source: "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png")
assert_equal("https://www.pixiv.net/artworks/14901720", @post.normalized_source)
@post.update!(source: "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip")
assert_equal("https://www.pixiv.net/artworks/44524589", @post.normalized_source)
end
should "normalize nicoseiga links" do
@post.source = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf"
assert_equal("https://seiga.nicovideo.jp/seiga/im3521156", @post.normalized_source)
@post.source = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893"
assert_equal("https://seiga.nicovideo.jp/seiga/im3583893", @post.normalized_source)
end
should "normalize twitpic links" do
@post.source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"
assert_equal("https://twitpic.com/dks0tb", @post.normalized_source)
end
should "normalize deviantart links" do
@post.source = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg"
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", @post.normalized_source)
@post.source = "http://fc00.deviantart.net/fs71/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg"
assert_equal("https://deviantart.com/deviation/417560500", @post.normalized_source)
end
should "normalize karabako links" do
@post.source = "http://www.karabako.net/images/karabako_38835.jpg"
assert_equal("http://www.karabako.net/post/view/38835", @post.normalized_source)
end
should "normalize twipple links" do
@post.source = "http://p.twpl.jp/show/orig/mI2c3"
assert_equal("http://p.twipple.jp/mI2c3", @post.normalized_source)
end
should "normalize hentai foundry links" do
@post.source = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg"
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", @post.normalized_source)
@post.source = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg"
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", @post.normalized_source)
end
end
context "when validating tags" do

View File

@@ -160,5 +160,24 @@ module Sources
site = Sources::Strategies.find("https://sa-dui.artstation.com/projects/DVERn")
assert_equal("sa-dui", site.artist_name)
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://www.artstation.com/artwork/ghost-in-the-shell-fandom"
source2 = "https://anubis1982918.artstation.com/projects/qPVGP/"
source3 = "https://dudeunderscore.artstation.com/projects/NoNmD?album_id=23041"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal("https://anubis1982918.artstation.com/projects/qPVGP", Sources::Strategies.normalize_source(source2))
assert_equal("https://dudeunderscore.artstation.com/projects/NoNmD", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://cdna.artstation.com/p/assets/images/images/005/804/224/large/titapa-khemakavat-sa-dui-srevere.jpg?1493887236"
bad_source2 = "https://www.artstation.com"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
end
end
end
end

View File

@@ -32,7 +32,7 @@ module Sources
@artist = create(:artist, name: "nickbeja", url_string: "https://nickbeja.deviantart.com")
assert_equal("https://pre00.deviantart.net/423b/th/pre/i/2017/281/e/0/mindflayer_girl01_by_nickbeja-dbpxdt8.png", @site.image_url)
assert_equal(@site.image_url, @site.canonical_url)
assert_equal(@site.page_url, @site.canonical_url)
assert_equal("nickbeja", @site.artist_name)
assert_equal("https://www.deviantart.com/nickbeja", @site.profile_url)
assert_equal("https://www.deviantart.com/nickbeja/art/Mindflayer-Girl01-708675884", @site.page_url_from_image_url)
@@ -362,5 +362,32 @@ module Sources
assert_equal([@artist], @site.artists)
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "http://fc06.deviantart.net/fs71/f/2013/295/d/7/you_are_already_dead__by_mar11co-d6rgm0e.jpg"
source2 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/intermediary/f/8b472d70-a0d6-41b5-9a66-c35687090acc/d23jbr4-8a06af02-70cb-46da-8a96-42a6ba73cdb4.jpg/v1/fill/w_786,h_1017,q_70,strp/silverhawks_quicksilver_by_edsfox_d23jbr4-pre.jpg"
source3 = "http://orig12.deviantart.net/9b69/f/2017/023/7/c/illustration___tokyo_encount_oei__by_melisaongmiqin-dawi58s.png"
source4 = "http://fc00.deviantart.net/fs71/f/2013/337/3/5/35081351f62b432f84eaeddeb4693caf-d6wlrqs.jpg"
source5 = "https://images-wixmp-ed30a86b8c4ca887773594c2.wixmp.com/f/76098ac8-04ab-4784-b382-88ca082ba9b1/d9x7lmk-595099de-fe8f-48e5-9841-7254f9b2ab8d.png?token=eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJzdWIiOiJ1cm46YXBwOiIsImlzcyI6InVybjphcHA6Iiwib2JqIjpbW3sicGF0aCI6IlwvZlwvNzYwOThhYzgtMDRhYi00Nzg0LWIzODItODhjYTA4MmJhOWIxXC9kOXg3bG1rLTU5NTA5OWRlLWZlOGYtNDhlNS05ODQxLTcyNTRmOWIyYWI4ZC5wbmcifV1dLCJhdWQiOlsidXJuOnNlcnZpY2U6ZmlsZS5kb3dubG9hZCJdfQ.KFOVXAiF8MTlLb3oM-FlD0nnDvODmjqEhFYN5I2X5Bc"
source6 = "https://fav.me/dbc3a48"
assert_equal("https://www.deviantart.com/mar11co/art/You-Are-Already-Dead-408921710", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.deviantart.com/edsfox/art/Silverhawks-Quicksilver-126872896", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.deviantart.com/melisaongmiqin/art/Illustration-Tokyo-Encount-Oei-659256076", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.deviantart.com/deviation/417560500", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.deviantart.com/deviation/599977532", Sources::Strategies.normalize_source(source5))
assert_equal("https://www.deviantart.com/deviation/685436408", Sources::Strategies.normalize_source(source6))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "http://fc08.deviantart.net/images3/i/2004/088/8/f/Blackrose_for_MuzicFreq.jpg"
bad_source2 = "http://prnt00.deviantart.net/9b74/b/2016/101/4/468a9d89f52a835d4f6f1c8caca0dfb2-pnjfbh.jpg"
bad_source3 = "https://deviantart.net"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
end
end
end
end

View File

@@ -85,5 +85,22 @@ module Sources
assert_equal([@artist], @image.artists)
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "http://pictures.hentai-foundry.com//a/AnimeFlux/219123.jpg"
source2 = "http://pictures.hentai-foundry.com/a/AnimeFlux/219123/Mobile-Suit-Equestria-rainbow-run.jpg"
source3 = "http://www.hentai-foundry.com/pictures/user/Ganassa/457176/LOL-Swimsuit---Caitlyn-reworked-nude-ver."
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.hentai-foundry.com/pictures/user/AnimeFlux/219123", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.hentai-foundry.com/pictures/user/Ganassa/457176", Sources::Strategies.normalize_source(source3))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://pictures.hentai-foundry.com/a/AnimeFlux"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end
end
end
end

View File

@@ -114,34 +114,29 @@ module Sources
end
end
context "Post#normalized_source" do
should "convert yande.re image urls to post urls" do
@post = FactoryBot.build(:post)
context "normalizing for source" do
should "normalize yande.re sources correctly" do
source1 = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png"
source2 = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg"
source3 = "https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg"
source4 = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png"
source5 = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg"
@post.source = "https://files.yande.re/image/b66909b940e8d77accab7c9b25aa4dc3/yande.re%20377828.png"
assert_equal("https://yande.re/post/show/377828", @post.normalized_source)
@post.source = "https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg"
assert_equal("https://yande.re/post/show/349790", @post.normalized_source)
@post.source = "https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png"
assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", @post.normalized_source)
@post.source = "https://yande.re/jpeg/22577d2344fe694cf47f80563031b3cd.jpg"
assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", @post.normalized_source)
assert_equal("https://yande.re/post/show/377828", Sources::Strategies.normalize_source(source1))
assert_equal("https://yande.re/post/show/349790", Sources::Strategies.normalize_source(source2))
assert_equal("https://yande.re/post/show/469784", Sources::Strategies.normalize_source(source3))
assert_equal("https://yande.re/post?tags=md5:b4b1d11facd1700544554e4805d47bb6", Sources::Strategies.normalize_source(source4))
assert_equal("https://yande.re/post?tags=md5:22577d2344fe694cf47f80563031b3cd", Sources::Strategies.normalize_source(source5))
end
should "convert konachan.com image urls to post urls" do
@post = FactoryBot.build(:post)
should "normalize konachan.com sources correctly" do
source1 = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg"
source2 = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"
source3 = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg"
@post.source = "https://konachan.com/image/5d633771614e4bf5c17df19a0f0f333f/Konachan.com%20-%20270807%20black_hair%20bokuden%20clouds%20grass%20landscape%20long_hair%20original%20phone%20rope%20scenic%20seifuku%20skirt%20sky%20summer%20torii%20tree.jpg"
assert_equal("https://konachan.com/post/show/270807", @post.normalized_source)
@post.source = "https://konachan.com/sample/e2e2994bae738ff52fff7f4f50b069d5/Konachan.com%20-%20270803%20sample.jpg"
assert_equal("https://konachan.com/post/show/270803", @post.normalized_source)
@post.source = "https://konachan.com/image/99a3c4f10c327d54486259a74173fc0b.jpg"
assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", @post.normalized_source)
assert_equal("https://konachan.com/post/show/270807", Sources::Strategies.normalize_source(source1))
assert_equal("https://konachan.com/post/show/270803", Sources::Strategies.normalize_source(source2))
assert_equal("https://konachan.com/post?tags=md5:99a3c4f10c327d54486259a74173fc0b", Sources::Strategies.normalize_source(source3))
end
end
end

View File

@@ -61,5 +61,24 @@ module Sources
assert_match(full_image_url, site.canonical_url)
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "http://lohas.nicoseiga.jp/priv/3521156?e=1382558156&h=f2e089256abd1d453a455ec8f317a6c703e2cedf"
source2 = "http://lohas.nicoseiga.jp/priv/b80f86c0d8591b217e7513a9e175e94e00f3c7a1/1384936074/3583893"
source3 = "http://lohas.nicoseiga.jp/o/910aecf08e542285862954017f8a33a8c32a8aec/1433298801/4937663"
source4 = "http://seiga.nicovideo.jp/image/source?id=3312222"
assert_equal("https://seiga.nicovideo.jp/seiga/im3521156", Sources::Strategies.normalize_source(source1))
assert_equal("https://seiga.nicovideo.jp/seiga/im3583893", Sources::Strategies.normalize_source(source2))
assert_equal("https://seiga.nicovideo.jp/seiga/im4937663", Sources::Strategies.normalize_source(source3))
assert_equal("https://seiga.nicovideo.jp/seiga/im3312222", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://seiga.nicovideo.jp"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end
end
end
end

View File

@@ -274,5 +274,25 @@ module Sources
end
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://pic01.nijie.info/nijie_picture/diff/main/218856_0_236014_20170620101329.png"
source2 = "https://pic04.nijie.info/nijie_picture/diff/main/287736_161475_20181112032855_1.png"
assert_equal("https://nijie.info/view.php?id=218856", Sources::Strategies.normalize_source(source1))
assert_equal("https://nijie.info/view.php?id=287736", Sources::Strategies.normalize_source(source2))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://pic01.nijie.info/nijie_picture/20120211210359.jpg"
bad_source2 = "https://pic04.nijie.info/omata/4829_20161128012012.png"
bad_source3 = "https://pic03.nijie.info/nijie_picture/28310_20131101215959.jpg"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
assert_equal(bad_source3, Sources::Strategies.normalize_source(bad_source3))
end
end
end
end

View File

@@ -23,10 +23,150 @@ module Sources
should "find the artist" do
a1 = FactoryBot.create(:artist, name: "test1", url_string: "http://oremuhax.x0.com")
a2 = FactoryBot.create(:artist, name: "test2", url_string: "http://google.com")
assert_equal([a1], @site.artists)
end
end
context "normalizing for source" do
should "normalize twitpic links" do
source = "http://d3j5vwomefv46c.cloudfront.net/photos/large/820960031.jpg?1384107199"
assert_equal("https://twitpic.com/dks0tb", Sources::Strategies.normalize_source(source))
end
should "normalize karabako links" do
source = "http://www.karabako.net/images/karabako_38835.jpg"
assert_equal("http://www.karabako.net/post/view/38835", Sources::Strategies.normalize_source(source))
end
should "normalize twipple links" do
source = "http://p.twpl.jp/show/orig/mI2c3"
assert_equal("http://p.twipple.jp/mI2c3", Sources::Strategies.normalize_source(source))
end
should "normalize fc2 links" do
source1 = "https://blog-imgs-41.fc2.com/t/u/y/tuyadasi/file.png"
source2 = "http://diary.fc2.com/user/kazuharoom/img/2020_1/29.jpg"
assert_equal("http://tuyadasi.blog.fc2.com/img/file.png/", Sources::Strategies.normalize_source(source1))
assert_equal("http://diary.fc2.com/cgi-sys/ed.cgi/kazuharoom?Y=2020&M=1&D=29", Sources::Strategies.normalize_source(source2))
end
should "normalize facebook links" do
source = "https://scontent-sin1-1.xx.fbcdn.net/hphotos-xtp1/t31.0-8/11254493_576443445841777_7716273903390212288_o.jpg"
assert_equal("https://www.facebook.com/photo.php?fbid=576443445841777", Sources::Strategies.normalize_source(source))
end
should "normalize sankaku links" do
source = "http://cs.sankakucomplex.com/data/sample/c2/d7/sample-c2d7270b84ac81326384d4eadd4d4746.jpg?2738848"
assert_equal("https://chan.sankakucomplex.com/en/post/show?md5=c2d7270b84ac81326384d4eadd4d4746", Sources::Strategies.normalize_source(source))
end
should "normalize zerochan links" do
source1 = "http://static.zerochan.net/full/23/15/183273.jpg"
source2 = "https://s4.zerochan.net/Victorique.de.Blois.full.411536.jpg"
source3 = "http://www.zerochan.net/full/1567893"
assert_equal("https://www.zerochan.net/183273#full", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.zerochan.net/411536#full", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.zerochan.net/1567893#full", Sources::Strategies.normalize_source(source3))
end
should "normalize minitokyo links" do
source1 = "http://static.minitokyo.net/downloads/27/13/365677.jpg?433592448,Minitokyo.Eien.no.Aselia.Scans_365677.jpg"
source2 = "http://static.minitokyo.net/downloads/14/33/199164.jpg?928244019"
assert_equal("http://gallery.minitokyo.net/download/365677", Sources::Strategies.normalize_source(source1))
assert_equal("http://gallery.minitokyo.net/download/199164", Sources::Strategies.normalize_source(source2))
end
should "normalize gelbooru links" do
source1 = "https://gelbooru.com//images/ee/5c/ee5c9a69db9602c95debdb9b98fb3e3e.jpeg"
source2 = "http://simg.gelbooru.com//images/2003/edd1d2b3881cf70c3acf540780507531.png"
source3 = "https://simg3.gelbooru.com//samples/0b/3a/sample_0b3ae5e225072b8e391c827cb470d29c.jpg"
assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=ee5c9a69db9602c95debdb9b98fb3e3e", Sources::Strategies.normalize_source(source1))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=edd1d2b3881cf70c3acf540780507531", Sources::Strategies.normalize_source(source2))
assert_equal("https://gelbooru.com/index.php?page=post&s=list&md5=0b3ae5e225072b8e391c827cb470d29c", Sources::Strategies.normalize_source(source3))
end
should "normalize wikia links" do
source = "https://vignette.wikia.nocookie.net/valkyriecrusade/images/c/c5/Crimson_Hatsune_H.png/revision/latest?cb=20180702031954"
assert_equal("https://valkyriecrusade.wikia.com/wiki/File:Crimson_Hatsune_H.png", Sources::Strategies.normalize_source(source))
end
should "normalize e-shuushuu links" do
source = "http://e-shuushuu.net/images/2014-07-22-662472.png"
assert_equal("https://e-shuushuu.net/image/662472", Sources::Strategies.normalize_source(source))
end
should "normalize nijigen-daiaru links" do
source = "http://jpg.nijigen-daiaru.com/19909/029.jpg"
assert_equal("http://nijigen-daiaru.com/book.php?idb=19909", Sources::Strategies.normalize_source(source))
end
should "normalize doujinantena links" do
source = "http://sozai.doujinantena.com/contents_jpg/d6c39f09d435e32c221e4ef866eceba4/015.jpg"
assert_equal("http://doujinantena.com/page.php?id=d6c39f09d435e32c221e4ef866eceba4", Sources::Strategies.normalize_source(source))
end
should "normalize paheal.net links" do
source = "http://rule34-data-010.paheal.net/_images/854806addcd3b1246424e7cea49afe31/852405%20-%20Darkstalkers%20Felicia.jpg"
assert_equal("https://rule34.paheal.net/post/list/md5:854806addcd3b1246424e7cea49afe31/1", Sources::Strategies.normalize_source(source))
end
should "normalize shimmie.katawa-shoujo.com links" do
source = "http://shimmie.katawa-shoujo.com/image/2740.png"
assert_equal("https://shimmie.katawa-shoujo.com/post/view/2740", Sources::Strategies.normalize_source(source))
end
should "normalize rule34.xxx links" do
source = "https://us.rule34.xxx//images/1802/0adc8fa0604dc445b4b47e6f4c436a08.jpeg?1949807"
assert_equal("https://rule34.xxx/index.php?page=post&s=list&md5=0adc8fa0604dc445b4b47e6f4c436a08", Sources::Strategies.normalize_source(source))
end
should "normalize diarypro links" do
source1 = "http://nekomataya.net/diarypro/data/upfile/216-1.jpg"
source2 = "http://akimbo.sakura.ne.jp/diarypro/diary.cgi?mode=image&upfile=716-3.jpg"
assert_equal("http://nekomataya.net/diarypro/diary.cgi?no=216", Sources::Strategies.normalize_source(source1))
assert_equal("http://akimbo.sakura.ne.jp/diarypro/diary.cgi?no=716", Sources::Strategies.normalize_source(source2))
end
should "normalize minus.com links" do
source = "http://i1.minus.com/ibb0DuE2Ds0yE6.jpg"
assert_equal("http://minus.com/i/bb0DuE2Ds0yE6", Sources::Strategies.normalize_source(source))
end
should "normalize photozou links" do
source1 = "http://kura3.photozou.jp/pub/794/1481794/photo/161537258_org.v1364829097.jpg"
source2 = "http://art59.photozou.jp/pub/212/1986212/photo/118493247_org.v1534644005.jpg"
assert_equal("https://photozou.jp/photo/show/1481794/161537258", Sources::Strategies.normalize_source(source1))
assert_equal("https://photozou.jp/photo/show/1986212/118493247", Sources::Strategies.normalize_source(source2))
end
should "normalize toranoana links" do
source1 = "http://img.toranoana.jp/popup_img/04/0030/09/76/040030097695-2p.jpg"
source2 = "https://ecdnimg.toranoana.jp/ec/img/04/0030/65/34/040030653417-6p.jpg"
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030097695/", Sources::Strategies.normalize_source(source1))
assert_equal("https://ec.toranoana.jp/tora_r/ec/item/040030653417/", Sources::Strategies.normalize_source(source2))
end
should "normalize hitomi.la links" do
source1 = "https://aa.hitomi.la/galleries/883451/t_rena1g.png"
source2 = "https://la.hitomi.la/galleries/1054851/001_main_image.jpg"
assert_equal("https://hitomi.la/galleries/883451.html", Sources::Strategies.normalize_source(source1))
assert_equal("https://hitomi.la/reader/1054851.html#1", Sources::Strategies.normalize_source(source2))
end
should "leave unknown sources as they are" do
source1 = "https://google.com"
source2 = "a bad non-http source"
source3 = "https://example.com/Folder/中央大学.html"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source2, Sources::Strategies.normalize_source(source2))
assert_equal(source3, Sources::Strategies.normalize_source(source3))
end
end
end
end

View File

@@ -94,5 +94,23 @@ module Sources
assert_equal("evazion", @site.artist_name)
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://pawoo.net/@evazion/19451018/"
source2 = "https://pawoo.net/web/statuses/19451018/favorites"
assert_equal("https://pawoo.net/@evazion/19451018", Sources::Strategies.normalize_source(source1))
assert_equal("https://pawoo.net/web/statuses/19451018", Sources::Strategies.normalize_source(source2))
end
should "avoid normalizing unnormalizable urls" do
bad_source1 = "https://img.pawoo.net/media_attachments/files/001/297/997/original/c4272a09570757c2.png"
bad_source2 = "https://pawoo.net/@evazion/media"
assert_equal(bad_source1, Sources::Strategies.normalize_source(bad_source1))
assert_equal(bad_source2, Sources::Strategies.normalize_source(bad_source2))
end
end
end
end

View File

@@ -363,5 +363,21 @@ module Sources
end
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "http://i2.pixiv.net/img12/img/zenze/39749565.png"
source2 = "http://i1.pixiv.net/img53/img/themare/39735353_big_p1.jpg"
source3 = "http://i1.pixiv.net/c/150x150/img-master/img/2010/11/30/08/39/58/14901720_p0_master1200.jpg"
source4 = "http://i1.pixiv.net/img-original/img/2010/11/30/08/39/58/14901720_p0.png"
source5 = "http://i2.pixiv.net/img-zip-ugoira/img/2014/08/05/06/01/10/44524589_ugoira1920x1080.zip"
assert_equal("https://www.pixiv.net/artworks/39749565", Sources::Strategies.normalize_source(source1))
assert_equal("https://www.pixiv.net/artworks/39735353", Sources::Strategies.normalize_source(source2))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source3))
assert_equal("https://www.pixiv.net/artworks/14901720", Sources::Strategies.normalize_source(source4))
assert_equal("https://www.pixiv.net/artworks/44524589", Sources::Strategies.normalize_source(source5))
end
end
end
end

View File

@@ -259,5 +259,24 @@ module Sources
end
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://octrain1020.tumblr.com/post/190713122589"
source2 = "https://octrain1020.tumblr.com/image/190713122589"
source3 = "https://octrain1020.tumblr.com/image/190713122589#asd"
source4 = "https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3))
assert_equal("https://superboin.tumblr.com/post/141169066579", Sources::Strategies.normalize_source(source4))
end
should "avoid normalizing unnormalizable urls" do
bad_source = "https://octrain1020.tumblr.com/"
assert_equal(bad_source, Sources::Strategies.normalize_source(bad_source))
end
end
end
end

View File

@@ -298,5 +298,24 @@ module Sources
assert_includes(site.translated_tags.map(&:name), "nishizumi_miho")
end
end
context "normalizing for source" do
should "normalize correctly" do
source1 = "https://twitter.com/i/web/status/1261877313349640194"
source2 = "https://twitter.com/BOW999/status/1261877313349640194"
source3 = "https://twitter.com/BOW999/status/1261877313349640194/photo/1"
source4 = "https://twitter.com/BOW999/status/1261877313349640194?s=19"
assert_equal(source1, Sources::Strategies.normalize_source(source1))
assert_equal(source1, Sources::Strategies.normalize_source(source2))
assert_equal(source1, Sources::Strategies.normalize_source(source3))
assert_equal(source1, Sources::Strategies.normalize_source(source4))
end
should "normalize twimg twitpic correctly" do
source = "https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs"
assert_equal("https://twitpic.com/dtnuru", Sources::Strategies.normalize_source(source))
end
end
end
end