* post keeper is calculated synchronously

* add fallback mechanism in case archive service is not up
* pass along most recently added tags to any keeper calculation
This commit is contained in:
r888888888
2018-02-26 17:48:16 -08:00
parent 0c5dcbbf68
commit 3657cacd17
9 changed files with 332 additions and 293 deletions

View File

@@ -8,7 +8,7 @@ gem "sprockets-rails", :require => "sprockets/railtie"
gem "uglifier" gem "uglifier"
gem "therubyracer", :platforms => :ruby gem "therubyracer", :platforms => :ruby
gem "rails", "~> 4.2.0" gem "rails", "~> 4.2.0"
gem "pg" gem "pg", "0.21.0"
gem "dalli", :platforms => :ruby gem "dalli", :platforms => :ruby
gem "memcache-client", :platforms => [:mswin, :mingw, :x64_mingw] gem "memcache-client", :platforms => [:mswin, :mingw, :x64_mingw]
gem "tzinfo-data", :platforms => [:mswin, :mingw, :x64_mingw] gem "tzinfo-data", :platforms => [:mswin, :mingw, :x64_mingw]
@@ -67,6 +67,7 @@ group :development, :test do
gem 'awesome_print' gem 'awesome_print'
gem 'pry-byebug' gem 'pry-byebug'
gem 'ruby-prof' gem 'ruby-prof'
gem 'foreman'
end end
group :test do group :test do

View File

@@ -130,6 +130,13 @@ GEM
ffaker (2.8.1) ffaker (2.8.1)
ffi (1.9.18) ffi (1.9.18)
ffi (1.9.18-x64-mingw32) ffi (1.9.18-x64-mingw32)
foreman (0.63.0)
dotenv (>= 0.7)
thor (>= 0.13.6)
foreman (0.63.0-mingw32)
dotenv (>= 0.7)
thor (>= 0.13.6)
win32console (~> 1.3.0)
get_process_mem (0.2.1) get_process_mem (0.2.1)
globalid (0.4.1) globalid (0.4.1)
activesupport (>= 4.2.0) activesupport (>= 4.2.0)
@@ -233,8 +240,8 @@ GEM
multi_xml (~> 0.5) multi_xml (~> 0.5)
rack (>= 1.2, < 3) rack (>= 1.2, < 3)
os (0.9.6) os (0.9.6)
pg (0.19.0) pg (0.21.0)
pg (0.19.0-x64-mingw32) pg (0.21.0-x64-mingw32)
protected_attributes (1.1.4) protected_attributes (1.1.4)
activemodel (>= 4.0.1, < 5.0) activemodel (>= 4.0.1, < 5.0)
pry (0.11.3) pry (0.11.3)
@@ -396,6 +403,7 @@ GEM
webrobots (0.1.2) webrobots (0.1.2)
whenever (0.10.0) whenever (0.10.0)
chronic (>= 0.6.3) chronic (>= 0.6.3)
win32console (1.3.2)
PLATFORMS PLATFORMS
ruby ruby
@@ -423,6 +431,7 @@ DEPENDENCIES
dtext_rb! dtext_rb!
factory_girl factory_girl
ffaker ffaker
foreman
google-api-client google-api-client
highline highline
httparty httparty
@@ -434,7 +443,7 @@ DEPENDENCIES
net-sftp net-sftp
newrelic_rpm newrelic_rpm
oauth2 oauth2
pg pg (= 0.21.0)
protected_attributes protected_attributes
pry-byebug pry-byebug
radix62 (~> 1.0.1) radix62 (~> 1.0.1)

View File

@@ -1,6 +1,9 @@
module DelayedJobsHelper module DelayedJobsHelper
def print_name(job) def print_name(job)
case job.name case job.name
when "PostKeeperManager.check_and_update"
"<strong>update post tagger</strong>"
when "Tag.increment_post_counts" when "Tag.increment_post_counts"
"<strong>increment post counts</strong>" "<strong>increment post counts</strong>"
@@ -68,6 +71,9 @@ module DelayedJobsHelper
def print_handler(job) def print_handler(job)
case job.name case job.name
when "PostKeeperManager.check_and_update"
""
when "Tag.increment_post_counts", "Tag.decrement_post_counts" when "Tag.increment_post_counts", "Tag.decrement_post_counts"
"" ""

View File

@@ -3,10 +3,71 @@ class PostKeeperManager
PostArchive.enabled? PostArchive.enabled?
end end
def self.queue_check(post_id) # these are all class methods to simplify interaction with delayedjob
delay(queue: "default").check_and_update(post_id)
# in general we want to call these methods synchronously because updating
# the keeper data with a delay defeats the purpose. but this relies on
# archive db being up; we don't want to block updates in case it goes down.
# so we need to permit async updates also.
def self.queue_check(post_id, updater_id, increment_tags)
delay(queue: "default").check_and_update(post_id, updater_id, increment_tags, false)
end end
def self.check_and_update(post, updater_id = nil, increment_tags = nil)
post = Post.find(post) unless post.is_a?(Post)
keeper_id = check(post, updater_id, increment_tags)
post.keeper_data = {uid: keeper_id}
end
# because post archives might get delayed, we need to pass along the most
# recently added tags inside the job. downside: this doesn't keep track of
# source or rating changes. this method changes no state.
def self.check(post, updater_id = nil, increment_tags = nil, enable_async = true)
if enable_async && !PostArchive.test_connection
# if archive is down, just queue this work and do it later
queue_check(post.id, updater_id, increment_tags)
return
end
changes = {}
final_tags = Set.new(post.tag_array)
# build a mapping of who added a tag first
PostArchive.where(post_id: post.id).order("updated_at").each do |pa|
pa.added_tags.each do |at|
if pa.updater_id
if !changes.has_key?(at) && final_tags.include?(at)
changes[at] = pa.updater_id
end
if pa.source_changed? && pa.source == post.source
changes[" source"] = pa.updater_id
end
end
end
end
if updater_id && increment_tags.present?
increment_tags.each do |tag|
if !changes.has_key?(tag)
changes[tag] = updater_id
end
end
end
# add up how many changes each user has made
ranking = changes.values.uniq.inject({}) do |h, user_id|
h[user_id] = changes.select {|k, v| v == user_id}.size
h
end
ranking.max_by {|k, v| v}.try(:first)
end
# these methods are for reporting and are not used
# in general, unweighted changes attribution 5% of the time, # in general, unweighted changes attribution 5% of the time,
# weighted changes attribution 12% of the time at w=1000, # weighted changes attribution 12% of the time at w=1000,
# up to 17% of the time at w=100. # up to 17% of the time at w=100.
@@ -45,14 +106,6 @@ class PostKeeperManager
# end # end
end end
def self.check_and_update(post_id)
post = Post.find(post_id)
keeper_id = check(post)
CurrentUser.as_system do
post.update_column(:keeper_data, {uid: keeper_id})
end
end
def self.print_weighted(post, w = 1000) def self.print_weighted(post, w = 1000)
changes = {} changes = {}
final_tags = Set.new(post.tag_array) final_tags = Set.new(post.tag_array)
@@ -119,50 +172,4 @@ class PostKeeperManager
ranking.max_by {|k, v| v}.first ranking.max_by {|k, v| v}.first
end end
def self.check(post)
changes = {}
final_tags = Set.new(post.tag_array)
# build a mapping of who added a tag first
PostArchive.where(post_id: post.id).order("updated_at").each do |pa|
# Rails.logger.debug "archive #{pa.id}"
pa.added_tags.each do |at|
# Rails.logger.debug " checking #{at}"
if pa.updater_id
if !changes.has_key?(at) && final_tags.include?(at)
# Rails.logger.debug " adding #{at} for #{pa.updater_id}"
changes[at] = pa.updater_id
end
if pa.source_changed? && pa.source == post.source
# Rails.logger.debug " adding source for #{pa.updater_id}"
changes[" source"] = pa.updater_id
end
else
# Rails.logger.debug " no updater"
end
end
# easy to double count trivial changes if a user is just fixing mistakes
# pa.removed_tags.each do |rt|
# Rails.logger.debug " checking -#{rt}"
# if pa.updater_id
# if !changes.has_key?("-#{rt}") && !final_tags.include?(rt)
# Rails.logger.debug " adding -#{rt} for #{pa.updater_id}"
# changes["-#{rt}"] = pa.updater_id
# end
# else
# Rails.logger.debug " no updater"
# end
# end
end
# add up how many changes each user has made
ranking = changes.values.uniq.inject({}) do |h, user_id|
h[user_id] = changes.select {|k, v| v == user_id}.size
h
end
ranking.max_by {|k, v| v}.first
end
end end

View File

@@ -131,6 +131,13 @@ class ApplicationRecord < ActiveRecord::Base
def columns(*params) def columns(*params)
super.reject {|x| x.sql_type == "tsvector"} super.reject {|x| x.sql_type == "tsvector"}
end end
def test_connection
limit(1).select(:id)
return true
rescue PG::Error
return false
end
end end
end end

View File

@@ -57,9 +57,7 @@ class Post < ApplicationRecord
has_many :favorites has_many :favorites
has_many :replacements, class_name: "PostReplacement", :dependent => :destroy has_many :replacements, class_name: "PostReplacement", :dependent => :destroy
if PostKeeperManager.enabled? serialize :keeper_data, JSON
serialize :keeper_data, JSON
end
if PostArchive.enabled? if PostArchive.enabled?
has_many :versions, lambda {order("post_versions.updated_at ASC")}, :class_name => "PostArchive", :dependent => :destroy has_many :versions, lambda {order("post_versions.updated_at ASC")}, :class_name => "PostArchive", :dependent => :destroy
@@ -76,7 +74,11 @@ class Post < ApplicationRecord
end end
def keeper_id def keeper_id
keeper_data ? keeper_data[:uid] : uploader_id if PostKeeperManager.enabled?
keeper_data ? keeper_data["uid"] : uploader_id
else
uploader_id
end
end end
def keeper def keeper
@@ -664,7 +666,7 @@ class Post < ApplicationRecord
if PostKeeperManager.enabled? && persisted? if PostKeeperManager.enabled? && persisted?
# no need to do this check on the initial create # no need to do this check on the initial create
PostKeeperManager.queue_check(id) PostKeeperManager.check_and_update(self, CurrentUser.id, increment_tags)
end end
end end

View File

@@ -2,7 +2,7 @@ module PostSetPresenters
class WikiPage < PostSetPresenters::Post class WikiPage < PostSetPresenters::Post
def posts def posts
@post_set.posts @post_set.posts
rescue ActiveRecord::StatementInvalid, PGError rescue ActiveRecord::StatementInvalid, PG::Error
[] []
end end

File diff suppressed because it is too large Load Diff

View File

@@ -6,8 +6,6 @@ class PostKeeperManagerTest < ActiveSupport::TestCase
context "#check_and_update" do context "#check_and_update" do
setup do setup do
Timecop.travel(1.month.ago) do Timecop.travel(1.month.ago) do
@system = FactoryGirl.create(:user)
User.stubs(:system).returns(@system)
@alice = FactoryGirl.create(:user) @alice = FactoryGirl.create(:user)
@bob = FactoryGirl.create(:user) @bob = FactoryGirl.create(:user)
@carol = FactoryGirl.create(:user) @carol = FactoryGirl.create(:user)
@@ -39,8 +37,6 @@ class PostKeeperManagerTest < ActiveSupport::TestCase
context "#check" do context "#check" do
setup do setup do
Timecop.travel(1.month.ago) do Timecop.travel(1.month.ago) do
@system = FactoryGirl.create(:user)
User.stubs(:system).returns(@system)
@alice = FactoryGirl.create(:user) @alice = FactoryGirl.create(:user)
@bob = FactoryGirl.create(:user) @bob = FactoryGirl.create(:user)
@carol = FactoryGirl.create(:user) @carol = FactoryGirl.create(:user)
@@ -62,15 +58,10 @@ class PostKeeperManagerTest < ActiveSupport::TestCase
@post.update_attributes(tag_string: "aaa bbb ccc") @post.update_attributes(tag_string: "aaa bbb ccc")
end end
end end
CurrentUser.scoped(@carol) do
Timecop.travel(4.hours.from_now) do
@post.update_attributes(tag_string: "ccc ddd eee fff ggg")
end
end
end end
should "find the most frequent tagger for a post" do should "find the most frequent tagger for a post" do
assert_equal(@carol.id, subject.check(@post)) assert_equal(@carol.id, subject.check(@post, @carol.id, %w(ddd eee fff ggg)))
end end
end end
end end