tinami: get the full image.

Support grabbing the full image for Tinami uploads, rather than the sample. Getting the full image requires making a request like this: curl -X POST \ -H 'Referer: https://www.tinami.com/' \ -H 'Content-Type: application/x-www-form-urlencoded' \ -H 'Cookie: Tinami2SESSID=<redacted>;' \ --data-raw 'action_view_original=true&cont_id=1087268&ethna_csrf=<redacted>' \ https://www.tinami.com/view/1087268 Then scraping the <img> tag from the resulting HTML page. If the post has multiple images, then we need to scrape and pass the `sub_id` of the image too. Fixes #2818.
2022-03-19 23:22:09 -05:00
parent 0ddc09f011
commit 7f58cfbe5e
4 changed files with 62 additions and 17 deletions
--- a/app/logical/sources/strategies/tinami.rb
+++ b/app/logical/sources/strategies/tinami.rb
@@ -12,18 +12,35 @@ module Sources
      def image_urls
        if parsed_url.image_url?
          [url]
+
+        # http://www.tinami.com/view/1087268 (single image)
+        elsif page&.css("img.captify")&.size.to_i == 1
+          [full_image_url]
+
+        # http://www.tinami.com/view/1087270 (multiple images)
+        elsif image_sub_ids.present?
+          image_sub_ids.map { |sub_id| full_image_url(sub_id) }
+
+        # http://www.tinami.com/view/1087271 (multiple images)
+        elsif nv_body_image_urls.present?
+          nv_body_image_urls
+
+        # http://www.tinami.com/view/1087267 (no images, text only)
        else
-          # Page type 1: http://www.tinami.com/view/1087268
-          # Page type 2: http://www.tinami.com/view/1087271
-          # Page type 3: http://www.tinami.com/view/1087270
-          # Page type 4: http://www.tinami.com/view/1087267 (no images, text only)
-          page&.css(".viewbody img.captify, .viewbody .nv_body img").to_a.map do |img|
-            # img[:src] == "//img.tinami.com/illust2/img/619/6234b647da609.jpg"
-            "https:#{img[:src]}"
-          end
+          []
        end
      end

+      def nv_body_image_urls
+        page&.css(".viewbody .nv_body img").to_a.map do |img|
+          "https:#{img[:src]}" # img[:src] == "//img.tinami.com/illust2/img/619/6234b647da609.jpg"
+        end
+      end
+
+      def image_sub_ids
+        page&.css(".viewbody #controller_model .thumbnail_list").to_a.map { |td| td.attr("sub_id") }
+      end
+
      def page_url
        parsed_url.page_url || parsed_referer&.page_url
      end
@@ -59,6 +76,24 @@ module Sources
        Source::URL.parse(url)&.user_id
      end

+      def work_id
+        parsed_url.work_id || parsed_referer&.work_id
+      end
+
+      def ethna_csrf
+        page&.at("#open_original_content input[name=ethna_csrf]")&.attr("value")
+      end
+
+      def full_image_url(sub_id = nil)
+        return nil unless work_id.present? && ethna_csrf.present?
+
+        # Note that we have to spoof the Referer here.
+        response = http.post(page_url, form: { action_view_original: true, cont_id: work_id, sub_id: sub_id, ethna_csrf: ethna_csrf })
+        return nil unless response.status == 200
+
+        response.parse.at("body > div > a > img[src^='//img.tinami.com']")&.attr("src")&.prepend("https:")
+      end
+
      def page
        return nil if page_url.blank?

@@ -68,7 +103,11 @@ module Sources
        response.parse
      end

-      memoize :page, :user_id
+      def http
+        super.cookies(Tinami2SESSID: Danbooru.config.tinami_session_id).use(:spoof_referrer)
+      end
+
+      memoize :http, :page, :user_id, :work_id, :ethna_csrf, :image_urls, :image_sub_ids, :nv_body_image_urls
    end
  end
 end