diff --git a/Gemfile b/Gemfile index 7ebff38a5..dafab3015 100644 --- a/Gemfile +++ b/Gemfile @@ -8,7 +8,7 @@ gem "sprockets-rails", :require => "sprockets/railtie" gem "uglifier" gem "therubyracer", :platforms => :ruby gem "rails", "~> 4.2.0" -gem "pg" +gem "pg", "0.21.0" gem "dalli", :platforms => :ruby gem "memcache-client", :platforms => [:mswin, :mingw, :x64_mingw] gem "tzinfo-data", :platforms => [:mswin, :mingw, :x64_mingw] @@ -67,6 +67,7 @@ group :development, :test do gem 'awesome_print' gem 'pry-byebug' gem 'ruby-prof' + gem 'foreman' end group :test do diff --git a/Gemfile.lock b/Gemfile.lock index b7c4ea5ab..5a9b1c022 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -130,6 +130,13 @@ GEM ffaker (2.8.1) ffi (1.9.18) ffi (1.9.18-x64-mingw32) + foreman (0.63.0) + dotenv (>= 0.7) + thor (>= 0.13.6) + foreman (0.63.0-mingw32) + dotenv (>= 0.7) + thor (>= 0.13.6) + win32console (~> 1.3.0) get_process_mem (0.2.1) globalid (0.4.1) activesupport (>= 4.2.0) @@ -233,8 +240,8 @@ GEM multi_xml (~> 0.5) rack (>= 1.2, < 3) os (0.9.6) - pg (0.19.0) - pg (0.19.0-x64-mingw32) + pg (0.21.0) + pg (0.21.0-x64-mingw32) protected_attributes (1.1.4) activemodel (>= 4.0.1, < 5.0) pry (0.11.3) @@ -396,6 +403,7 @@ GEM webrobots (0.1.2) whenever (0.10.0) chronic (>= 0.6.3) + win32console (1.3.2) PLATFORMS ruby @@ -423,6 +431,7 @@ DEPENDENCIES dtext_rb! factory_girl ffaker + foreman google-api-client highline httparty @@ -434,7 +443,7 @@ DEPENDENCIES net-sftp newrelic_rpm oauth2 - pg + pg (= 0.21.0) protected_attributes pry-byebug radix62 (~> 1.0.1) diff --git a/app/helpers/delayed_jobs_helper.rb b/app/helpers/delayed_jobs_helper.rb index 1c0c298db..a94f37f29 100644 --- a/app/helpers/delayed_jobs_helper.rb +++ b/app/helpers/delayed_jobs_helper.rb @@ -1,6 +1,9 @@ module DelayedJobsHelper def print_name(job) case job.name + when "PostKeeperManager.check_and_update" + "update post tagger" + when "Tag.increment_post_counts" "increment post counts" @@ -68,6 +71,9 @@ module DelayedJobsHelper def print_handler(job) case job.name + when "PostKeeperManager.check_and_update" + "" + when "Tag.increment_post_counts", "Tag.decrement_post_counts" "" diff --git a/app/logical/google_big_query/post_version.rb b/app/logical/google_big_query/post_version.rb index e3e670466..e430841f0 100644 --- a/app/logical/google_big_query/post_version.rb +++ b/app/logical/google_big_query/post_version.rb @@ -20,6 +20,13 @@ module GoogleBigQuery "regexp_match(removed_tags, \"(?:^| )#{es}(?:$| )\")" end + def find_for_post(post_id, created_at) + post_id = post_id.to_i + btime = created_at.strftime("%Y-%m-%d 00:00:00", created_at) + etime = 1.day.from(created_at).strftime("%Y-%m-%d 00:00:00") + "select updater_id, added_tag from [danbooru_#{Rails.env}].post_versions_flat_part where _partitiontime >= #{btime} and _partitiontime <= #{etime} and post_id = #{post_id}" + end + def find(user_id, added_tags, removed_tags, min_version_id, max_version_id, limit = 1_000) constraints = [] diff --git a/app/logical/post_keeper_manager.rb b/app/logical/post_keeper_manager.rb new file mode 100644 index 000000000..32c023983 --- /dev/null +++ b/app/logical/post_keeper_manager.rb @@ -0,0 +1,175 @@ +class PostKeeperManager + def self.enabled? + PostArchive.enabled? + end + + # these are all class methods to simplify interaction with delayedjob + + # in general we want to call these methods synchronously because updating + # the keeper data with a delay defeats the purpose. but this relies on + # archive db being up; we don't want to block updates in case it goes down. + # so we need to permit async updates also. + + def self.queue_check(post_id, updater_id, increment_tags) + delay(queue: "default").check_and_update(post_id, updater_id, increment_tags, false) + end + + def self.check_and_update(post, updater_id = nil, increment_tags = nil) + post = Post.find(post) unless post.is_a?(Post) + keeper_id = check(post, updater_id, increment_tags) + post.keeper_data = {uid: keeper_id} + end + + # because post archives might get delayed, we need to pass along the most + # recently added tags inside the job. downside: this doesn't keep track of + # source or rating changes. this method changes no state. + def self.check(post, updater_id = nil, increment_tags = nil, enable_async = true) + if enable_async && !PostArchive.test_connection + # if archive is down, just queue this work and do it later + queue_check(post.id, updater_id, increment_tags) + return + end + + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + pa.added_tags.each do |at| + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + changes[" source"] = pa.updater_id + end + end + end + end + + if updater_id && increment_tags.present? + increment_tags.each do |tag| + if !changes.has_key?(tag) + changes[tag] = updater_id + end + end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.size + h + end + + ranking.max_by {|k, v| v}.try(:first) + end + + + # these methods are for reporting and are not used + + # in general, unweighted changes attribution 5% of the time, + # weighted changes attribution 12% of the time at w=1000, + # up to 17% of the time at w=100. + def self.evaluate(post_ids) + total = 0 + matches = 0 + weighted_matches = 0 + keeper_dist = {} + uploader_dist = {} + Post.where(id: post_ids).find_each do |post| + keeper = check(post) + total += 1 + if keeper != post.uploader_id + matches += 1 + # keeper_dist[keeper] ||= 0 + # keeper_dist[keeper] += 1 + # uploader_dist[post.uploader_id] ||= 0 + # uploader_dist[post.uploader_id] += 1 + end + if check_weighted(post) != post.uploader_id + puts post.id + weighted_matches += 1 + end + end + + puts "total: #{total}" + puts "unweighted changes: #{matches}" + puts "weighted changes: #{weighted_matches}" + # puts "keepers:" + # keeper_dist.each do |k, v| + # puts " #{k}: #{v}" + # end + # puts "uploaders:" + # uploader_dist.each do |k, v| + # puts " #{k}: #{v}" + # end + end + + def self.print_weighted(post, w = 1000) + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + pa.added_tags.each do |at| + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + changes[" source"] = pa.updater_id + end + end + end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id| + count = Tag.find_by_name(tag).try(:post_count) || 0 + 1.0 / (w + count) + end.sum + h + end + + ranking.sort_by {|k, v| v}.each do |user_id, score| + user = User.find(user_id) + sum = changes.select {|k, v| v == user_id}.size + Rails.logger.debug "#{user.name}: %.4f (%d)" % [score, sum] + end + end + + def self.check_weighted(post, w = 1000) + changes = {} + final_tags = Set.new(post.tag_array) + + # build a mapping of who added a tag first + PostArchive.where(post_id: post.id).order("updated_at").each do |pa| + pa.added_tags.each do |at| + if pa.updater_id + if !changes.has_key?(at) && final_tags.include?(at) + changes[at] = pa.updater_id + end + + if pa.source_changed? && pa.source == post.source + changes[" source"] = pa.updater_id + end + end + end + end + + # add up how many changes each user has made + ranking = changes.values.uniq.inject({}) do |h, user_id| + h[user_id] = changes.select {|k, v| v == user_id}.map do |tag, user_id| + count = Tag.find_by_name(tag).try(:post_count) || 0 + 1.0 / (w + count) + end.sum + h + end + + ranking.max_by {|k, v| v}.first + end + +end diff --git a/app/models/application_record.rb b/app/models/application_record.rb index bb7dfbed3..3b61dcff4 100644 --- a/app/models/application_record.rb +++ b/app/models/application_record.rb @@ -131,6 +131,13 @@ class ApplicationRecord < ActiveRecord::Base def columns(*params) super.reject {|x| x.sql_type == "tsvector"} end + + def test_connection + limit(1).select(:id) + return true + rescue PG::Error + return false + end end end diff --git a/app/models/post.rb b/app/models/post.rb index 29985c556..a89866f77 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -57,15 +57,39 @@ class Post < ApplicationRecord has_many :favorites has_many :replacements, class_name: "PostReplacement", :dependent => :destroy + serialize :keeper_data, JSON + if PostArchive.enabled? has_many :versions, lambda {order("post_versions.updated_at ASC")}, :class_name => "PostArchive", :dependent => :destroy end attr_accessible :source, :rating, :tag_string, :old_tag_string, :old_parent_id, :old_source, :old_rating, :parent_id, :has_embedded_notes, :as => [:member, :builder, :gold, :platinum, :moderator, :admin, :default] - attr_accessible :is_rating_locked, :is_note_locked, :has_cropped, :as => [:builder, :moderator, :admin] - attr_accessible :is_status_locked, :as => [:admin] + attr_accessible :is_rating_locked, :is_note_locked, :has_cropped, :keeper_data, :as => [:builder, :moderator, :admin] + attr_accessible :is_status_locked, :keeper_data, :as => [:admin] attr_accessor :old_tag_string, :old_parent_id, :old_source, :old_rating, :has_constraints, :disable_versioning, :view_count + concerning :KeeperMethods do + included do + before_create :initialize_keeper + end + + def keeper_id + if PostKeeperManager.enabled? + keeper_data ? keeper_data["uid"] : uploader_id + else + uploader_id + end + end + + def keeper + User.find(keeper_id) + end + + def initialize_keeper + self.keeper_data = {uid: uploader_id} + end + end + module FileMethods extend ActiveSupport::Concern @@ -639,6 +663,11 @@ class Post < ApplicationRecord if decrement_tags.any? Tag.decrement_post_counts(decrement_tags) end + + if PostKeeperManager.enabled? && persisted? + # no need to do this check on the initial create + PostKeeperManager.check_and_update(self, CurrentUser.id, increment_tags) + end end def set_tag_count(category,tagcount) diff --git a/app/presenters/post_set_presenters/wiki_page.rb b/app/presenters/post_set_presenters/wiki_page.rb index 1299b2d6e..932eb2c3b 100644 --- a/app/presenters/post_set_presenters/wiki_page.rb +++ b/app/presenters/post_set_presenters/wiki_page.rb @@ -2,7 +2,7 @@ module PostSetPresenters class WikiPage < PostSetPresenters::Post def posts @post_set.posts - rescue ActiveRecord::StatementInvalid, PGError + rescue ActiveRecord::StatementInvalid, PG::Error [] end diff --git a/app/views/posts/partials/show/_information.html.erb b/app/views/posts/partials/show/_information.html.erb index 5a6ab8ade..f6fad113f 100644 --- a/app/views/posts/partials/show/_information.html.erb +++ b/app/views/posts/partials/show/_information.html.erb @@ -1,6 +1,9 @@