Lofter: simplify tag extraction logic

Now that we have a separate parsing class we can just use it to properly parse tag urls as well.
2022-03-08 17:00:47 +01:00
parent 37441d6b1a
commit ff6bfff311
2 changed files with 9 additions and 13 deletions
--- a/app/logical/source/url/lofter.rb
+++ b/app/logical/source/url/lofter.rb
@@ -28,7 +28,7 @@
 class Source::URL::Lofter < Source::URL
  RESERVED_SUBDOMAINS = %w[www.lofter.com i.lofter.com]

-  attr_reader :username, :work_id
+  attr_reader :username, :work_id, :unescaped_tag

  def self.match?(url)
    url.domain.in?(%w[lofter.com 127.net lf127.net])
@@ -48,6 +48,10 @@ class Source::URL::Lofter < Source::URL
      @username = $1
      @work_id = work_id

+    # https://gengar563.lofter.com/tag/%E5%BA%9F%E5%BC%83%E7%9B%90%E9%85%B8%E5%A4%84%E7%90%86%E5%8E%82
+    in /^([a-z0-9-]+)\.lofter\.com$/, "tag", tag
+      @unescaped_tag = CGI.unescape(tag)
+
    # http://gengar563.lofter.com
    in /^([a-z0-9-]+)\.lofter\.com$/, *rest unless host.in?(RESERVED_SUBDOMAINS)
      @username = $1
--- a/app/logical/sources/strategies/lofter.rb
+++ b/app/logical/sources/strategies/lofter.rb
@@ -42,19 +42,11 @@ module Sources
      end

      def tags
-        tags = page&.search(".info .tag, .main .tag a, .tagarea, .m-info .tags .tag")
-
-        tags.to_a.map do |tag|
-          [tag.text.gsub(/^● /, ""), tag.attr("href")]
+        return [] if artist_name.blank?
+        page&.search("[href*='#{artist_name}.lofter.com/tag/']").to_a.map do |tag|
+          href = tag.attr("href")
+          [Source::URL.parse(href).unescaped_tag, href]
        end
-
-        if tags.blank?
-          tags = page&.search(".main .tags a").to_a.map do |tag|
-            [tag.text.gsub(/^#/, ""), tag.attr("href")]
-          end
-        end
-
-        tags
      end

      def artist_commentary_desc