Refactor source normalization

* Move the source normalization logic out of the post model
  and into individual sources' strategies.
* Rewrite normalization tests to be handled into each source's test,
  and expand them significantly. Previously we were only testing
  a very small subset of domains and variants.
* Fix up normalization for several sites.
* Normalize fav.me urls into normal deviantart urls.
This commit is contained in:
nonamethanks
2020-05-16 23:03:09 +02:00
parent 364343453c
commit 307df3b3e4
26 changed files with 674 additions and 315 deletions

View File

@@ -1,6 +1,7 @@
# Original images:
#
# * https://yande.re/image/b4b1d11facd1700544554e4805d47bb6/.png
# * https://files.yande.re/image/e4c2ba38de88ff1640aaebff84c84e81/469784.jpg
# * https://files.yande.re/image/2a5d1d688f565cb08a69ecf4e35017ab/yande.re%20349790%20breast_hold%20kurashima_tomoyasu%20mahouka_koukou_no_rettousei%20naked%20nipples.jpg
# * https://ayase.yande.re/image/2d0d229fd8465a325ee7686fcc7f75d2/yande.re%20192481%20animal_ears%20bunny_ears%20garter_belt%20headphones%20mitha%20stockings%20thighhighs.jpg
# * https://yuno.yande.re/image/1764b95ae99e1562854791c232e3444b/yande.re%20281544%20cameltoe%20erect_nipples%20fundoshi%20horns%20loli%20miyama-zero%20sarashi%20sling_bikini%20swimsuits.jpg
@@ -33,7 +34,7 @@ module Sources
class Moebooru < Base
BASE_URL = %r!\Ahttps?://(?:[^.]+\.)?(?<domain>yande\.re|konachan\.com)!i
POST_URL = %r!#{BASE_URL}/post/show/(?<id>\d+)!i
URL_SLUG = %r!/(?:yande\.re%20|Konachan\.com%20-%20)(?<id>\d+).*!i
URL_SLUG = %r!/(?:yande\.re%20|Konachan\.com%20-%20)?(?<id>\d+)?.*!i
IMAGE_URL = %r!#{BASE_URL}/(?<type>image|jpeg|sample)/(?<md5>\h{32})#{URL_SLUG}?\.(?<ext>jpg|jpeg|png|gif)\z!i
delegate :artist_name, :profile_url, :tag_name, :artist_commentary_title, :artist_commentary_desc, :dtext_artist_commentary_title, :dtext_artist_commentary_desc, to: :sub_strategy, allow_nil: true
@@ -75,6 +76,17 @@ module Sources
image_url
end
def normalize_for_source
id = post_id_from_url
md5 = post_md5_from_url
if id.present?
"https://#{site_name}/post/show/#{id}"
elsif md5.present?
"https://#{site_name}/post?tags=md5:#{md5}"
end
end
def tags
api_response[:tags].to_s.split.map do |tag|
[tag, "https://#{site_name}/post?tags=#{CGI.escape(tag)}"]