diff --git a/app/logical/storage_manager.rb b/app/logical/storage_manager.rb new file mode 100644 index 000000000..1ce833f88 --- /dev/null +++ b/app/logical/storage_manager.rb @@ -0,0 +1,106 @@ +class StorageManager + class Error < StandardError; end + + DEFAULT_BASE_URL = Rails.application.routes.url_helpers.root_url + "data" + DEFAULT_BASE_DIR = "#{Rails.root}/public/data" + + attr_reader :base_url, :base_dir, :hierarchical, :tagged_filenames, :large_image_prefix + + def initialize(base_url: DEFAULT_BASE_URL, base_dir: DEFAULT_BASE_DIR, hierarchical: false, tagged_filenames: Danbooru.config.enable_seo_post_urls, large_image_prefix: Danbooru.config.large_image_prefix) + @base_url = base_url.chomp("/") + @base_dir = base_dir + @hierarchical = hierarchical + @tagged_filenames = tagged_filenames + @large_image_prefix = large_image_prefix + end + + # Store the given file at the given path. If a file already exists at that + # location it should be overwritten atomically. Either the file is fully + # written, or an error is raised and the original file is left unchanged. The + # file should never be in a partially written state. + def store(io, path) + raise NotImplementedError, "store not implemented" + end + + # Delete the file at the given path. If the file doesn't exist, no error + # should be raised. + def delete(path) + raise NotImplementedError, "delete not implemented" + end + + # Return a readonly copy of the file located at the given path. + def open(path) + raise NotImplementedError, "open not implemented" + end + + def store_file(io, post, type) + store(io, file_path(post.md5, post.file_ext, type)) + end + + def delete_file(post_id, md5, file_ext, type) + delete(file_path(md5, file_ext, type)) + end + + def open_file(post, type) + open(file_path(post.md5, post.file_ext, type)) + end + + def file_url(post, type) + subdir = subdir_for(post.md5) + file = file_name(post.md5, post.file_ext, type) + + if type == :preview && !post.has_preview? + "#{base_url}/images/download-preview.png" + elsif type == :preview + "#{base_url}/preview/#{subdir}#{file}" + elsif type == :large && post.has_large? + "#{base_url}/sample/#{subdir}#{seo_tags(post)}#{file}" + else + "#{base_url}/#{subdir}#{seo_tags(post)}#{file}" + end + end + + protected + + def file_path(md5, file_ext, type) + subdir = subdir_for(md5) + file = file_name(md5, file_ext, type) + + case type + when :preview + "#{base_dir}/preview/#{subdir}#{file}" + when :large + "#{base_dir}/sample/#{subdir}#{file}" + when :original + "#{base_dir}/#{subdir}#{file}" + end + end + + def file_name(md5, file_ext, type) + large_file_ext = (file_ext == "zip") ? "webm" : "jpg" + + case type + when :preview + "#{md5}.jpg" + when :large + "#{large_image_prefix}#{md5}.#{large_file_ext}" + when :original + "#{md5}.#{file_ext}" + end + end + + def subdir_for(md5) + if hierarchical + "#{md5[0..1]}/#{md5[2..3]}/" + else + "" + end + end + + def seo_tags(post, user = CurrentUser.user) + return "" if !tagged_filenames || user.disable_tagged_filenames? + + tags = post.humanized_essential_tag_string.gsub(/[^a-z0-9]+/, "_").gsub(/(?:^_+)|(?:_+$)/, "").gsub(/_{2,}/, "_") + "__#{tags}__" + end +end diff --git a/app/logical/storage_manager/hybrid.rb b/app/logical/storage_manager/hybrid.rb new file mode 100644 index 000000000..1e3afdda4 --- /dev/null +++ b/app/logical/storage_manager/hybrid.rb @@ -0,0 +1,23 @@ +class StorageManager::Hybrid < StorageManager + attr_reader :submanager + + def initialize(&block) + @submanager = block + end + + def store_file(io, post, type) + submanager[post.id, post.md5, post.file_ext, type].store_file(io, post, type) + end + + def delete_file(post_id, md5, file_ext, type) + submanager[post_id, md5, file_ext, type].delete_file(post_id, md5, file_ext, type) + end + + def open_file(io, post, type) + submanager[post.id, post.md5, post.file_ext, type].open_file(post, type) + end + + def file_url(post, type) + submanager[post.id, post.md5, post.file_ext, type].file_url(post, type) + end +end diff --git a/app/logical/storage_manager/local.rb b/app/logical/storage_manager/local.rb new file mode 100644 index 000000000..6bdfa69e3 --- /dev/null +++ b/app/logical/storage_manager/local.rb @@ -0,0 +1,25 @@ +class StorageManager::Local < StorageManager + DEFAULT_PERMISSIONS = 0644 + + def store(io, dest_path) + temp_path = dest_path + "-" + SecureRandom.uuid + ".tmp" + + FileUtils.mkdir_p(File.dirname(temp_path)) + bytes_copied = IO.copy_stream(io, temp_path) + raise Error, "store failed: #{bytes_copied}/#{io.size} bytes copied" if bytes_copied != io.size + + FileUtils.chmod(DEFAULT_PERMISSIONS, temp_path) + File.rename(temp_path, dest_path) + rescue StandardError => e + FileUtils.rm_f(temp_path) + raise Error, e + end + + def delete(path) + FileUtils.rm_f(path) + end + + def open(path) + File.open(path, "r", binmode: true) + end +end diff --git a/app/logical/storage_manager/null.rb b/app/logical/storage_manager/null.rb new file mode 100644 index 000000000..ebaeb8bfe --- /dev/null +++ b/app/logical/storage_manager/null.rb @@ -0,0 +1,13 @@ +class StorageManager::Null < StorageManager + def store(io, path) + # no-op + end + + def delete(path) + # no-op + end + + def open(path) + # no-op + end +end diff --git a/app/logical/storage_manager/s3.rb b/app/logical/storage_manager/s3.rb new file mode 100644 index 000000000..6549bf0bf --- /dev/null +++ b/app/logical/storage_manager/s3.rb @@ -0,0 +1,43 @@ +class StorageManager::S3 < StorageManager + # https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#initialize-instance_method + DEFAULT_S3_OPTIONS = { + region: Danbooru.config.aws_region, + credentials: Danbooru.config.aws_credentials, + logger: Rails.logger, + } + + # https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#put_object-instance_method + DEFAULT_PUT_OPTIONS = { + acl: "public-read", + storage_class: "STANDARD", # STANDARD, STANDARD_IA, REDUCED_REDUNDANCY + cache_control: "public, max-age=#{1.year.to_i}", + #content_type: "image/jpeg" # XXX should set content type + } + + attr_reader :bucket, :client, :s3_options + + def initialize(bucket, client: nil, s3_options: {}, **options) + @bucket = bucket + @s3_options = DEFAULT_S3_OPTIONS.merge(s3_options) + @client = client || Aws::S3::Client.new(**@s3_options) + super(**options) + end + + def store(io, path) + data = io.read + base64_md5 = Digest::MD5.base64digest(data) + client.put_object(bucket: bucket, key: path, body: data, content_md5: base64_md5, **DEFAULT_PUT_OPTIONS) + end + + def delete(path) + client.delete_object(bucket: bucket, key: path) + rescue Aws::S3::Errors::NoSuchKey + # ignore + end + + def open(path) + file = Tempfile.new(binmode: true) + client.get_object(bucket: bucket: key: path, response_target: file) + file + end +end diff --git a/app/logical/storage_manager/sftp.rb b/app/logical/storage_manager/sftp.rb new file mode 100644 index 000000000..add10d010 --- /dev/null +++ b/app/logical/storage_manager/sftp.rb @@ -0,0 +1,76 @@ +class StorageManager::SFTP < StorageManager + DEFAULT_PERMISSIONS = 0644 + + # http://net-ssh.github.io/net-ssh/Net/SSH.html#method-c-start + DEFAULT_SSH_OPTIONS = { + timeout: 10, + logger: Rails.logger, + verbose: :fatal, + non_interactive: true, + } + + attr_reader :hosts, :ssh_options + + def initialize(*hosts, ssh_options: {}, **options) + @hosts = hosts + @ssh_options = DEFAULT_SSH_OPTIONS.merge(ssh_options) + super(**options) + end + + def store(file, dest_path) + temp_upload_path = dest_path + "-" + SecureRandom.uuid + ".tmp" + dest_backup_path = dest_path + "-" + SecureRandom.uuid + ".bak" + + each_host do |host, sftp| + begin + sftp.upload!(file.path, temp_upload_path) + sftp.setstat!(temp_upload_path, permissions: DEFAULT_PERMISSIONS) + + # `rename!` can't overwrite existing files, so if a file already exists + # at dest_path we move it out of the way first. + force { sftp.rename!(dest_path, dest_backup_path) } + force { sftp.rename!(temp_upload_path, dest_path) } + rescue StandardError => e + # if anything fails, try to move the original file back in place (if it was moved). + force { sftp.rename!(dest_backup_path, dest_path) } + raise Error, e + ensure + force { sftp.remove!(temp_upload_path) } + force { sftp.remove!(dest_backup_path) } + end + end + end + + def delete(dest_path) + each_host do |host, sftp| + force { sftp.remove!(dest_path) } + end + end + + def open(dest_path) + file = Tempfile.new(binmode: true) + + Net::SFTP.start(hosts.first, nil, ssh_options) do |sftp| + sftp.download!(dest_path, file.path) + end + + file + end + + protected + + # Ignore "no such file" exceptions for the given operation. + def force + yield + rescue Net::SFTP::StatusException => e + raise Error, e unless e.description == "no such file" + end + + def each_host + hosts.each do |host| + Net::SFTP.start(host, nil, ssh_options) do |sftp| + yield host, sftp + end + end + end +end diff --git a/config/danbooru_default_config.rb b/config/danbooru_default_config.rb index fd30ddc0c..a1bf74ff6 100644 --- a/config/danbooru_default_config.rb +++ b/config/danbooru_default_config.rb @@ -222,6 +222,41 @@ module Danbooru "danbooru" end + # The method to use for storing image files. + def storage_manager + # Store files on the local filesystem. + # base_dir - where to store files (default: under public/data) + # base_url - where to serve files from (default: http://#{hostname}/data) + # hierarchical: false - store files in a single directory + # hierarchical: true - store files in a hierarchical directory structure, based on the MD5 hash + StorageManager::Local.new(base_dir: "#{Rails.root}/public/data", hierarchical: false) + + # Store files on one or more remote host(s). Configure SSH settings in + # ~/.ssh_config or in the ssh_options param (ref: http://net-ssh.github.io/net-ssh/Net/SSH.html#method-c-start) + # StorageManager::SFTP.new("i1.example.com", "i2.example.com", base_dir: "/mnt/backup", hierarchical: false, ssh_options: {}) + + # Store files in an S3 bucket. The bucket must already exist and be + # writable by you. Configure your S3 settings in aws_region and + # aws_credentials below, or in the s3_options param (ref: + # https://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html#initialize-instance_method) + # StorageManager::S3.new("my_s3_bucket", base_url: "https://my_s3_bucket.s3.amazonaws.com/", s3_options: {}) + + # Select the storage method based on the post's id and type (preview, large, or original). + # StorageManager::Hybrid.new do |id, md5, file_ext, type| + # ssh_options = { user: "danbooru" } + # + # if type.in?([:large, :original]) && id.in?(0..850_000) + # StorageManager::SFTP.new("raikou1.donmai.us", base_url: "https://raikou1.donmai.us", base_dir: "/path/to/files", hierarchical: true, ssh_options: ssh_options) + # elsif type.in?([:large, :original]) && id.in?(850_001..2_000_000) + # StorageManager::SFTP.new("raikou2.donmai.us", base_url: "https://raikou2.donmai.us", base_dir: "/path/to/files", hierarchical: true, ssh_options: ssh_options) + # elsif type.in?([:large, :original]) && id.in?(2_000_001..3_000_000) + # StorageManager::SFTP.new(*all_server_hosts, base_url: "https://hijiribe.donmai.us/data", ssh_options: ssh_options) + # else + # StorageManager::SFTP.new(*all_server_hosts, ssh_options: ssh_options) + # end + # end + end + def build_file_url(post) "/data/#{post.file_path_prefix}/#{post.md5}.#{post.file_ext}" end @@ -611,6 +646,14 @@ module Danbooru end # AWS config options + def aws_region + "us-east-1" + end + + def aws_credentials + Aws::Credentials.new(Danbooru.config.aws_access_key_id, Danbooru.config.aws_secret_access_key) + end + def aws_access_key_id end diff --git a/test/test_helper.rb b/test/test_helper.rb index 04a3d3edd..19474ca49 100644 --- a/test/test_helper.rb +++ b/test/test_helper.rb @@ -38,6 +38,9 @@ class ActiveSupport::TestCase mock_missed_search_service! WebMock.allow_net_connect! Danbooru.config.stubs(:enable_sock_puppet_validation?).returns(false) + + storage_manager = StorageManager::Local.new(base_dir: "#{Rails.root}/public/data/test") + Danbooru.config.stubs(:storage_manager).returns(storage_manager) end teardown do