From 133c45ee295cac082c32ddd502f25924dc144b7a Mon Sep 17 00:00:00 2001 From: evazion Date: Tue, 15 Mar 2022 00:49:54 -0500 Subject: [PATCH] sources: parse more profile url formats. Add support for parsing these URL formats: * https://www.artstation.com/felipecartin/profile * https://www.deviantart.com/nlpsllp/gallery * https://fantia.jp/asanagi * https://www.lofter.com/front/blog/home-page/noshiqian * https://www.lofter.com/app/xiaokonggedmx * https://www.lofter.com/blog/semblance * https://q.nicovideo.jp/users/18700356 * https://dic.nicovideo.jp/u/11141663 * https://3d.nicovideo.jp/users/109584 * https://3d.nicovideo.jp/u/siobi * https://game.nicovideo.jp/atsumaru/users/7757217 * https://www.pixiv.net/user/13569921/series/81967 * https://pixiv.cc/zerousagi/ * https://www.plurk.com/u/ddks2923 * https://www.plurk.com/m/u/leiy1225 * https://www.plurk.com/s/u/salmonroe13 * https://www.plurk.com/RSSSww/invite/4 * https://skeb.jp/@okku_oxn/works * https://www.tumblr.com/blog/view/artofelaineho/187614935612 * https://www.tumblr.com/blog/view/artofelaineho * https://www.tumblr.com/blog/artofelaineho * https://www.tumblr.com/dashboard/blog/dankwartart * https://rosarrie.tumblr.com/archive * https://whereisnovember.tumblr.com/tagged/art * https://twitpic.com/photos/Type10TK * https://www.weibo.com/detail/4676597657371957 * https://www.weibo.com/u/5957640693/home?wvr=5 * https://www.weibo.com/lvxiuzi0/home --- app/logical/source/url/art_station.rb | 3 +- app/logical/source/url/deviant_art.rb | 3 +- app/logical/source/url/fantia.rb | 17 +++++++++-- app/logical/source/url/lofter.rb | 9 ++++++ app/logical/source/url/nico_seiga.rb | 43 ++++++++++++++++++++++----- app/logical/source/url/pixiv.rb | 10 +++++-- app/logical/source/url/plurk.rb | 23 ++++++++++---- app/logical/source/url/skeb.rb | 9 +++--- app/logical/source/url/tumblr.rb | 39 +++++++++++++++++++----- app/logical/source/url/twit_pic.rb | 10 ++++++- app/logical/source/url/weibo.rb | 13 ++++++-- 11 files changed, 145 insertions(+), 34 deletions(-) diff --git a/app/logical/source/url/art_station.rb b/app/logical/source/url/art_station.rb index 54fb76296..77a32b5f9 100644 --- a/app/logical/source/url/art_station.rb +++ b/app/logical/source/url/art_station.rb @@ -40,7 +40,8 @@ class Source::URL::ArtStation < Source::URL @username = username # https://www.artstation.com/sa-dui - in "www.artstation.com", username + # https://www.artstation.com/felipecartin/profile + in "www.artstation.com", username, *rest @username = username # https://sa-dui.artstation.com diff --git a/app/logical/source/url/deviant_art.rb b/app/logical/source/url/deviant_art.rb index 769f0c434..1f18e0daf 100644 --- a/app/logical/source/url/deviant_art.rb +++ b/app/logical/source/url/deviant_art.rb @@ -66,7 +66,8 @@ module Source # https://www.deviantart.com/noizave # https://deviantart.com/noizave - in "deviantart.com", username + # https://www.deviantart.com/nlpsllp/gallery + in "deviantart.com", username, *rest @username = username # https://noizave.deviantart.com diff --git a/app/logical/source/url/fantia.rb b/app/logical/source/url/fantia.rb index c60c817d7..b4b163efe 100644 --- a/app/logical/source/url/fantia.rb +++ b/app/logical/source/url/fantia.rb @@ -1,7 +1,9 @@ # frozen_string_literal: true -# Unparsed: -# https://fantia.jp/asanagi +# Unhandled: +# +# https://fantia.jp/commissions/64988 +# https://fantia.jp/profiles/tus_2n9n0fm05fizg class Source::URL::Fantia < Source::URL attr_reader :full_image_url @@ -56,6 +58,11 @@ class Source::URL::Fantia < Source::URL in _, "fanclubs", /\d+/ => fanclub_id, *rest @fanclub_id = fanclub_id + # https://fantia.jp/asanagi + # https://fantia.jp/koruri + in _, username + @username = username + else end end @@ -77,7 +84,11 @@ class Source::URL::Fantia < Source::URL end def profile_url - "https://fantia.jp/fanclubs/#{fanclub_id}" if fanclub_id.present? + if fanclub_id.present? + "https://fantia.jp/fanclubs/#{fanclub_id}" + elsif username.present? + "https://fantia.jp/#{username}" + end end def work_id diff --git a/app/logical/source/url/lofter.rb b/app/logical/source/url/lofter.rb index 8767ec9eb..bbd623ef7 100644 --- a/app/logical/source/url/lofter.rb +++ b/app/logical/source/url/lofter.rb @@ -18,6 +18,15 @@ class Source::URL::Lofter < Source::URL in /127\.net$/, "img", _ nil + # https://www.lofter.com/front/blog/home-page/noshiqian + in "www.lofter.com", "front", "blog", "home-page", username + @username = username + + # http://www.lofter.com/app/xiaokonggedmx + # http://www.lofter.com/blog/semblance + in "www.lofter.com", ("app" | "blog"), username + @username = username + # https://gengar563.lofter.com/post/1e82da8c_1c98dae1b in /^([a-z0-9-]+)\.lofter\.com$/, "post", work_id unless host.in?(RESERVED_SUBDOMAINS) @username = $1 diff --git a/app/logical/source/url/nico_seiga.rb b/app/logical/source/url/nico_seiga.rb index 4e4101dad..1b0a798f6 100644 --- a/app/logical/source/url/nico_seiga.rb +++ b/app/logical/source/url/nico_seiga.rb @@ -24,7 +24,7 @@ # module Source class URL::NicoSeiga < Source::URL - attr_reader :illust_id, :manga_id, :image_id, :user_id + attr_reader :illust_id, :manga_id, :image_id, :user_id, :username, :profile_url def self.match?(url) url.domain.in?(%w[nicovideo.jp nicoseiga.jp nicomanga.jp nimg.jp]) @@ -92,17 +92,50 @@ module Source # https://ext.seiga.nicovideo.jp/user/illust/20542122 in /seiga\.nicovideo\.jp$/, "user", "illust", user_id @user_id = user_id + @profile_url = "https://seiga.nicovideo.jp/user/illust/#{user_id}" # http://seiga.nicovideo.jp/manga/list?user_id=23839737 # http://sp.seiga.nicovideo.jp/manga/list?user_id=23839737 in /seiga\.nicovideo\.jp$/, "manga", "list" if params[:user_id].present? @user_id = params[:user_id] + @profile_url = "https://seiga.nicovideo.jp/manga/list?user_id=#{user_id}" # https://www.nicovideo.jp/user/4572975 # https://www.nicovideo.jp/user/20446930/mylist/28674289 - # https://commons.nicovideo.jp/user/696839 - in ("commons.nicovideo.jp" | "www.nicovideo.jp"), "user", /^\d+$/ => user_id, *rest + in ("www.nicovideo.jp"), "user", /^\d+$/ => user_id, *rest @user_id = user_id + @profile_url = "https://www.nicovideo.jp/user/#{user_id}" + + # https://commons.nicovideo.jp/user/696839 + in "commons.nicovideo.jp", "user", /^\d+$/ => user_id, *rest + @user_id = user_id + @profile_url = "https://commons.nicovideo.jp/user/#{user_id}" + + # https://q.nicovideo.jp/users/18700356 + in "q.nicovideo.jp", "users", /^\d+$/ => user_id, *rest + @user_id = user_id + @profile_url = "https://q.nicovideo.jp/users/#{user_id}" + + # https://dic.nicovideo.jp/u/11141663 + in "dic.nicovideo.jp", "u", /^\d+$/ => user_id, *rest + @user_id = user_id + @profile_url = "https://dic.nicovideo.jp/u/#{user_id}" + + # https://3d.nicovideo.jp/users/109584 + # https://3d.nicovideo.jp/users/29626631/works + in "3d.nicovideo.jp", "users", /^\d+$/ => user_id, *rest + @user_id = user_id + @profile_url = "https://3d.nicovideo.jp/users/#{user_id}" + + # https://3d.nicovideo.jp/u/siobi + in "3d.nicovideo.jp", "u", username, *rest + @username = username + @profile_url = "https://3d.nicovideo.jp/u/#{username}" + + # http://game.nicovideo.jp/atsumaru/users/7757217 + in "game.nicovideo.jp", "atsumaru", "users", /^\d+$/ => user_id, *rest + @user_id = user_id + @profile_url = "https://game.nicovideo.jp/atsumaru/users/#{user_id}" else end @@ -117,9 +150,5 @@ module Source "https://seiga.nicovideo.jp/image/source/#{image_id}" end end - - def profile_url - "https://seiga.nicovideo.jp/user/illust/#{user_id}" if user_id.present? - end end end diff --git a/app/logical/source/url/pixiv.rb b/app/logical/source/url/pixiv.rb index 2887d78ec..658d461b7 100644 --- a/app/logical/source/url/pixiv.rb +++ b/app/logical/source/url/pixiv.rb @@ -7,7 +7,7 @@ module Source def self.match?(url) return false if Source::URL::Fanbox.match?(url) || Source::URL::PixivSketch.match?(url) - url.domain.in?(%w[pximg.net pixiv.net pixiv.me]) || url.host == "tc-pximg01.techorus-cdn.com" + url.domain.in?(%w[pximg.net pixiv.net pixiv.me pixiv.cc]) || url.host == "tc-pximg01.techorus-cdn.com" end def parse @@ -61,7 +61,9 @@ module Source # https://www.pixiv.net/u/9202877 # https://www.pixiv.net/users/9202877 # https://www.pixiv.net/users/76567/novels - in "www.pixiv.net", ("u" | "users"), user_id, *rest + # https://www.pixiv.net/users/39598149/illustrations?p=1 + # https://www.pixiv.net/user/13569921/series/81967 + in "www.pixiv.net", ("u" | "user" | "users"), user_id, *rest @user_id = user_id # https://www.pixiv.net/en/users/9202877 @@ -77,6 +79,10 @@ module Source in "www.pixiv.me", username @username = username + # https://pixiv.cc/zerousagi/ + in "www.pixiv.cc", username + @username = username + else end end diff --git a/app/logical/source/url/plurk.rb b/app/logical/source/url/plurk.rb index b70fc21d7..97e300f4d 100644 --- a/app/logical/source/url/plurk.rb +++ b/app/logical/source/url/plurk.rb @@ -12,7 +12,7 @@ class Source::URL::Plurk < Source::URL # https://images.plurk.com/5wj6WD0r6y4rLN0DL3sqag.jpg # https://images.plurk.com/mx_5wj6WD0r6y4rLN0DL3sqag.jpg - in "plurk.com", /^(mx_)?(\w{22})\.(\w+)$/ + in "plurk.com", /^(mx_)?(\w{22})\.(\w+)$/ if image_url? @image_id = $2 # https://www.plurk.com/p/om6zv4 @@ -23,14 +23,27 @@ class Source::URL::Plurk < Source::URL in "plurk.com", "m", "p", work_id @work_id = work_id - # https://www.plurk.com/redeyehare - in "plurk.com", username - @username = username - # https://www.plurk.com/m/redeyehare in "plurk.com", "m", username @username = username + # https://www.plurk.com/u/ddks2923 + in "plurk.com", "u", username + @username = username + + # https://www.plurk.com/m/u/leiy1225 + in "plurk.com", "m", "u", username + @username = username + + # https://www.plurk.com/s/u/salmonroe13 + in "plurk.com", "s", "u", username + @username = username + + # https://www.plurk.com/redeyehare + # https://www.plurk.com/RSSSww/invite/4 + in "plurk.com", username, *rest + @username = username + else end end diff --git a/app/logical/source/url/skeb.rb b/app/logical/source/url/skeb.rb index e4f0dbaf1..20efd6bc3 100644 --- a/app/logical/source/url/skeb.rb +++ b/app/logical/source/url/skeb.rb @@ -10,10 +10,6 @@ class Source::URL::Skeb < Source::URL def parse case [domain, *path_segments] - # https://skeb.jp/@asanagi - in "skeb.jp", /^@/ => username - @username = username.delete_prefix("@") - # https://skeb.jp/@OrvMZ/works/3 (non-watermarked) # https://skeb.jp/@OrvMZ/works/1 (separated request and client's message after delivery) # https://skeb.jp/@asanagi/works/16 (age-restricted, watermarked) @@ -23,6 +19,11 @@ class Source::URL::Skeb < Source::URL @username = username.delete_prefix("@") @work_id = work_id + # https://skeb.jp/@asanagi + # https://skeb.jp/@okku_oxn/works + in "skeb.jp", /^@/ => username, *rest + @username = username.delete_prefix("@") + # https://skeb.imgix.net/requests/199886_0?bg=%23fff&auto=format&w=800&s=5a6a908ab964fcdfc4713fad179fe715 # https://skeb.imgix.net/requests/73290_0?bg=%23fff&auto=format&txtfont=bold&txtshad=70&txtclr=BFFFFFFF&txtalign=middle%2Ccenter&txtsize=150&txt=SAMPLE&w=800&s=4843435cff85d623b1f657209d131526 # https://skeb.imgix.net/requests/53269_1?bg=%23fff&fm=png&dl=53269.png&w=1.0&h=1.0&s=44588ea9c41881049e392adb1df21cce (full size) diff --git a/app/logical/source/url/tumblr.rb b/app/logical/source/url/tumblr.rb index dab3e85ad..c51bc0fc4 100644 --- a/app/logical/source/url/tumblr.rb +++ b/app/logical/source/url/tumblr.rb @@ -12,11 +12,10 @@ class Source::URL::Tumblr < Source::URL # https://66.media.tumblr.com/168dabd09d5ad69eb5fedcf94c45c31a/3dbfaec9b9e0c2e3-72/s640x960/bf33a1324f3f36d2dc64f011bfeab4867da62bc8.png # https://66.media.tumblr.com/5a2c3fe25c977e2281392752ab971c90/3dbfaec9b9e0c2e3-92/s500x750/4f92bbaaf95c0b4e7970e62b1d2e1415859dd659.png - in /(\d+\.)?media\.tumblr\.com/ => host, *directories, /s\d+x\d+/ => dimensions, file + in _, *directories, /s\d+x\d+/ => dimensions, file if asset_url? @directory = directories.first max_size = Integer.sqrt(Danbooru.config.max_image_resolution) @full_image_url = url.to_s.gsub(%r{/s\d+x\d+/\w+\.\w+\z}i, "/s#{max_size}x#{max_size}/#{file}") - @file = file # http://data.tumblr.com/07e7bba538046b2b586433976290ee1f/tumblr_o3gg44HcOg1r9pi29o1_raw.jpg # https://40.media.tumblr.com/de018501416a465d898d24ad81d76358/tumblr_nfxt7voWDX1rsd4umo1_r23_1280.jpg @@ -30,25 +29,51 @@ class Source::URL::Tumblr < Source::URL # https://media.tumblr.com/0DNBGJovY5j3smfeQs8nB53z_500.jpg # https://media.tumblr.com/tumblr_m24kbxqKAX1rszquso1_1280.jpg # https://va.media.tumblr.com/tumblr_pgohk0TjhS1u7mrsl.mp4 - in /^(data|(?:\d+\.)?media|(?:vtt|ve|va\.media))\.tumblr\.com/, *directory, file + in _, *directory, file if asset_url? @directory = directory.first - @file = file - @filename, @old_variant_size, @extension = @file.match(/(\w+?)(?:_(\d+h?|raw))?\.(\w+)\z/).captures + @filename, @old_variant_size, @extension = file.match(/(\w+?)(?:_(\d+h?|raw))?\.(\w+)\z/).captures # https://marmaladica.tumblr.com/post/188237914346/saved # https://emlan.tumblr.com/post/189469423572/kuro-attempts-to-buy-a-racy-book-at-comiket-but # https://superboin.tumblr.com/post/141169066579/photoset_iframe/superboin/tumblr_o45miiAOts1u6rxu8/500/false # https://make-do5.tumblr.com/post/619663949657423872 - in _, ("post" | "image"), /\d+/ => work_id, *rest + # http://raspdraws.tumblr.com/image/70021467381 + in _, ("post" | "image"), /^\d+$/ => work_id, *rest @blog_name = subdomain unless subdomain == "www" @work_id = work_id + # https://www.tumblr.com/blog/view/artofelaineho/187614935612 + in ("www.tumblr.com" | "tumblr.com"), "blog", "view", blog_name, /^\d+$/ => work_id + @blog_name = blog_name + @work_id = work_id + + # https://www.tumblr.com/blog/view/artofelaineho + # https://tumblr.com/blog/view/artofelaineho + in ("www.tumblr.com" | "tumblr.com"), "blog", "view", blog_name + @blog_name = blog_name + + # https://www.tumblr.com/blog/artofelaineho + # http://tumblr.com/blog/kervalchan + in ("www.tumblr.com" | "tumblr.com"), "blog", blog_name + @blog_name = blog_name + + # https://www.tumblr.com/dashboard/blog/dankwartart + # https://tumblr.com/dashboard/blog/dankwartart + in ("www.tumblr.com" | "tumblr.com"), "dashboard", "blog", blog_name + @blog_name = blog_name + + # https://rosarrie.tumblr.com/archive + # https://solisnotte.tumblr.com/about + # http://whereisnovember.tumblr.com/tagged/art + in _, *rest unless asset_url? || subdomain == "www" + @blog_name = subdomain + else end end def asset_url? - @file.present? + host.ends_with?("media.tumblr.com") || host == "data.tumblr.com" end def variants diff --git a/app/logical/source/url/twit_pic.rb b/app/logical/source/url/twit_pic.rb index aca1faf7c..624fcbb49 100644 --- a/app/logical/source/url/twit_pic.rb +++ b/app/logical/source/url/twit_pic.rb @@ -22,7 +22,7 @@ # * http://twitpic.com/photos/Type10TK (dead) class Source::URL::TwitPic < Source::URL - attr_reader :base36_id + attr_reader :base36_id, :username def self.match?(url) url.host.in?(%w[twitpic.com o.twimg.com dn3pm25xmtlyu.cloudfront.net d3j5vwomefv46c.cloudfront.net]) @@ -43,6 +43,10 @@ class Source::URL::TwitPic < Source::URL in "twitpic.com", "show", size, _ @base36_id = filename + # http://twitpic.com/photos/Type10TK (dead) + in "twitpic.com", "photos", username + @username = username + # https://o.twimg.com/1/proxy.jpg?t=FQQVBBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2NhcndrZi5qcGcUBBYAEgA&s=y8haxddqxJYpWql9uVnP3aoFFS7rA10vOGPdTO5HXvk # https://o.twimg.com/2/proxy.jpg?t=HBgpaHR0cHM6Ly90d2l0cGljLmNvbS9zaG93L2xhcmdlL2R0bnVydS5qcGcUsAkU0ggAFgASAA&s=dnN4DHCdnojC-iCJWdvZ-UZinrlWqAP7k7lmll2fTxs in "twimg.com", subdir, "proxy.jpg" if params[:t].present? @@ -71,4 +75,8 @@ class Source::URL::TwitPic < Source::URL return nil unless base36_id.present? "https://twitpic.com/#{base36_id}" end + + def profile_url + "http://twitpic.com/photos/#{username}" if uesrname.present? + end end diff --git a/app/logical/source/url/weibo.rb b/app/logical/source/url/weibo.rb index f95e1f5b4..b759d0e66 100644 --- a/app/logical/source/url/weibo.rb +++ b/app/logical/source/url/weibo.rb @@ -3,6 +3,10 @@ # Unhandled: # # http://t.cn/A6c3ZAE1 -> https://m.weibo.cn/status/4623322346685004 +# http://weibo.sina.com/malson +# https://www.weibo.com/n/Windtalker10 (not the same as https://www.weibo.com/Windtalker10) +# http://blog.sina.com.cn/ayayayayayaya +# http://blog.sina.com.cn/u/1299088063 class Source::URL::Weibo < Source::URL attr_reader :full_image_url, :artist_short_id, :artist_long_id, :username @@ -40,7 +44,8 @@ class Source::URL::Weibo < Source::URL @illust_long_id = illust_long_id # https://m.weibo.cn/detail/4506950043618873 - in "m.weibo.cn", "detail", /^\d+$/ => illust_long_id + # https://www.weibo.com/detail/4676597657371957 + in _, "detail", /^\d+$/ => illust_long_id @illust_long_id = illust_long_id # https://m.weibo.cn/status/J33G4tH1B @@ -48,9 +53,10 @@ class Source::URL::Weibo < Source::URL @illust_base62_id = illust_base62_id # https://www.weibo.com/u/5501756072 + # https://www.weibo.com/u/5957640693/home?wvr=5 # https://m.weibo.cn/profile/5501756072 # https://m.weibo.cn/u/5501756072 - in _, ("u" | "profile"), /^\d+$/ => artist_short_id + in _, ("u" | "profile"), /^\d+$/ => artist_short_id, *rest @artist_short_id = artist_short_id # https://www.weibo.com/p/1005055399876326 (short id: https://www.weibo.com/u/5399876326; username: https://www.weibo.com/chengziyou666) @@ -68,7 +74,8 @@ class Source::URL::Weibo < Source::URL # https://www.weibo.com/endlessnsmt (short id: https://www.weibo.com/u/1879370780) # https://www.weibo.cn/endlessnsmt - in _, /^\w+$/ => artist_short_id + # https://www.weibo.com/lvxiuzi0/home + in _, /^\w+$/ => artist_short_id, *rest @username = username else