diff --git a/app/logical/related_tag_calculator.rb b/app/logical/related_tag_calculator.rb new file mode 100644 index 000000000..94b0a237f --- /dev/null +++ b/app/logical/related_tag_calculator.rb @@ -0,0 +1,33 @@ +class RelatedTagCalculator + def find_tags(tag, limit) + ActiveRecord::Base.connection.select_values_sql("SELECT tag_string FROM posts WHERE tag_index @@ to_tsquery('danbooru', ?) ORDER BY id DESC LIMIT ?", tag, limit) + end + + def calculate_from_sample(name, limit, category_constraint = nil) + counts = Hash.new {|h, k| h[k] = 0} + + find_tags(name, limit).each do |post| + if category_constraint + categories = Tag.categories_for(post.tag_array) + + post.tag_array.each do |tag| + if categories[tag] == category_constraint && tag != name + counts[tag] += 1 + end + end + else + post.tag_array.each do |tag| + if tag != name + counts[tag] += 1 + end + end + end + end + + counts + end + + def convert_hash_to_string(hash) + hash.to_a.sort_by {|x| -x[1]}.flatten.join(" ") + end +end diff --git a/app/models/job.rb b/app/models/job.rb new file mode 100644 index 000000000..df6f1896c --- /dev/null +++ b/app/models/job.rb @@ -0,0 +1,188 @@ +class Job < ActiveRecord::Base + CATEGORIES = %w(mass_tag_edit approve_tag_alias approve_tag_implication calculate_tag_subscriptions calculate_related_tags calculate_post_count calculate_uploaded_tags s3_backup) + STATUSES = %w(pending processing finished error) + + validates_inclusion_of :category, :in => CATEGORIES + validates_inclusion_of :status, :in => STATUSES + + def data + JSON.parse(data_as_json) + end + + def data=(text) + self.data_as_json = text.to_json + end + + def execute! + if repeat_count > 0 + count = repeat_count - 1 + else + count = repeat_count + end + + begin + execute_sql("SET statement_timeout = 0") + update_attribute(:status, "processing") + __send__("execute_#{task_type}") + + if count == 0 + update_attribute(:status, "finished") + else + update_attributes(:status => "pending", :repeat_count => count) + end + rescue SystemExit => x + update_attribute(:status, "pending") + rescue Exception => x + update_attributes(:status => "error", :status_message => "#{x.class}: #{x}") + end + end + + def execute_mass_tag_edit + start_tags = data["start_tags"] + result_tags = data["result_tags"] + updater_id = data["updater_id"] + updater_ip_addr = data["updater_ip_addr"] + Tag.mass_edit(start_tags, result_tags, updater_id, updater_ip_addr) + end + + def execute_approve_tag_alias + ta = TagAlias.find(data["id"]) + updater_id = data["updater_id"] + updater_ip_addr = data["updater_ip_addr"] + ta.approve(updater_id, updater_ip_addr) + end + + def execute_approve_tag_implication + ti = TagImplication.find(data["id"]) + updater_id = data["updater_id"] + updater_ip_addr = data["updater_ip_addr"] + ti.approve(updater_id, updater_ip_addr) + end + + def execute_calculate_tag_subscriptions + last_run = Time.parse(data["last_run"]) + if last_run.nil? || last_run < 20.minutes.ago + TagSubscription.process_all + update_attributes(:data => {:last_run => Time.now.strftime("%Y-%m-%d %H:%M")}) + end + end + + def execute_calculate_related_tags + tag_id = data["id"].to_i + tag = Tag.find_by_id(tag_id) + if tag + tag.commit_related(Tag.calculate_related(tag.name)) + end + end + + def execute_calculate_post_count + Tag.recalculate_post_count(data["tag_name"]) + end + + def execute_calculate_uploaded_tags + tags = [] + user = User.find(data["id"]) + CONFIG["tag_types"].values.uniq.each do |tag_type| + tags += user.calculate_uploaded_tags(tag_type) + end + + user.update_attribute(:uploaded_tags, tags.join("\n")) + end + + def execute_bandwidth_throttle + bw = File.read("/proc/net/dev").split(/\n/).grep(/eth1/).first.scan(/\S+/)[8].to_i + if $danbooru_bandwidth_previous + diff = bw - $danbooru_bandwidth_previous + else + diff = 0 + end + $danbooru_bandwidth_previous = bw + Cache.put("db-bw", diff) + end + + def execute_s3_backup + last_id = data["last_id"].to_i + + begin + Post.find(:all, :conditions => ["id > ?", last_id], :limit => 200, :order => "id").each do |post| + AWS::S3::Base.establish_connection!(:access_key_id => CONFIG["amazon_s3_access_key_id"], :secret_access_key => CONFIG["amazon_s3_secret_access_key"]) + if File.exists?(post.file_path) + base64_md5 = Base64.encode64(Digest::MD5.digest(File.read(post.file_path))) + AWS::S3::S3Object.store(post.file_name, open(post.file_path, "rb"), CONFIG["amazon_s3_bucket_name"], "Content-MD5" => base64_md5) + end + + if post.image? && File.exists?(post.preview_path) + AWS::S3::S3Object.store("preview/#{post.md5}.jpg", open(post.preview_path, "rb"), CONFIG["amazon_s3_bucket_name"]) + end + + if File.exists?(post.sample_path) + AWS::S3::S3Object.store("sample/" + CONFIG["sample_filename_prefix"] + "#{post.md5}.jpg", open(post.sample_path, "rb"), CONFIG["amazon_s3_bucket_name"]) + end + + update_attributes(:data => {:last_id => post.id}) + base64_md5 = nil + end + + rescue Exception => x + # probably some network error, retry next time + end + end + + def pretty_data + begin + case task_type + when "mass_tag_edit" + start = data["start_tags"] + result = data["result_tags"] + user = User.find_name(data["updater_id"]) + "start:#{start} result:#{result} user:#{user}" + + when "approve_tag_alias" + ta = TagAlias.find(data["id"]) + "start:#{ta.name} result:#{ta.alias_name}" + + when "approve_tag_implication" + ti = TagImplication.find(data["id"]) + "start:#{ti.predicate.name} result:#{ti.consequent.name}" + + when "calculate_tag_subscriptions" + last_run = data["last_run"] + "last run:#{last_run}" + + when "calculate_related_tags" + tag = Tag.find_by_id(data["id"]) + if tag + "tag:#{tag.name}" + else + "tag:UNKNOWN" + end + + when "calculate_post_count" + "tag:" + data["tag_name"] + + when "calculate_uploaded_tags" + "user:" + User.name(data["id"]) + + when "bandwidth_throttle" + "" + + when "s3_backup" + "last_id:" + data["last_id"].to_s + + end + rescue Exception + "ERROR" + end + end + + def self.pending_count(task_type) + JobTask.count(:conditions => ["task_type = ? and status = 'pending'", task_type]) + end + + def self.execute_once + find(:all, :conditions => ["status = ?", "pending"], :order => "id desc").each do |task| + task.execute! + sleep 1 + end + end +end diff --git a/app/models/post.rb b/app/models/post.rb index 34b365888..5a44d639c 100644 --- a/app/models/post.rb +++ b/app/models/post.rb @@ -11,6 +11,7 @@ class Post < ActiveRecord::Base belongs_to :updater, :class_name => "User" has_one :unapproval, :dependent => :destroy has_one :upload, :dependent => :destroy + has_one :moderation_detail, :class_name => "PostModerationDetail", :dependent => :destroy has_many :versions, :class_name => "PostVersion", :dependent => :destroy has_many :votes, :class_name => "PostVote", :dependent => :destroy attr_accessible :source, :rating, :tag_string, :old_tag_string, :updater_id, :updater_ip_addr diff --git a/app/models/post_moderation_detail.rb b/app/models/post_moderation_detail.rb new file mode 100644 index 000000000..0f001287c --- /dev/null +++ b/app/models/post_moderation_detail.rb @@ -0,0 +1,19 @@ +class PostModerationDetail < ActiveRecord::Base + belongs_to :post + belongs_to :user + + def self.filter(posts, user, select_hidden = false) + hidden = where(:user_id => user.id).select("post_id").map(&:post_id) + if select_hidden + posts.select {|x| hidden.include?(x.id)} + else + posts.reject {|x| hidden.include?(x.id)} + end + end + + def self.prune! + joins(:post).where("posts.is_pending = FALSE AND posts.is_flagged = FALSE").each do |hidden_post| + hidden_post.destroy + end + end +end diff --git a/app/models/tag.rb b/app/models/tag.rb index 5751cc7fe..62a05fded 100644 --- a/app/models/tag.rb +++ b/app/models/tag.rb @@ -305,6 +305,35 @@ class Tag < ActiveRecord::Base end end + module RelationMethods + def update_related + calculator = RelatedTagCalculator.new + counts = calculator.calculate_from_sample(Danbooru.config.post_sample_size, name) + self.related_tags = calculator.convert_hash_to_string(counts) + end + + def update_related_if_outdated + updated_related if should_update_related? + end + + def related_cache_expiry + base = Math.sqrt(post_count) + if base > 24 + 24 + else + base + end + end + + def should_update_related? + related_tags.blank? || related_tags_updated_at < related_cache_expiry.hours.ago + end + + def related_tag_array + related_tags.split(/ /).in_groups_of(2) + end + end + extend ViewCountMethods include CategoryMethods extend StatisticsMethods diff --git a/db/development_structure.sql b/db/development_structure.sql index 352a12a03..d9f3b0a67 100644 --- a/db/development_structure.sql +++ b/db/development_structure.sql @@ -762,6 +762,41 @@ CREATE SEQUENCE forum_topics_id_seq ALTER SEQUENCE forum_topics_id_seq OWNED BY forum_topics.id; +-- +-- Name: jobs; Type: TABLE; Schema: public; Owner: -; Tablespace: +-- + +CREATE TABLE jobs ( + id integer NOT NULL, + category character varying(255) NOT NULL, + status character varying(255) NOT NULL, + message text NOT NULL, + data_as_json text NOT NULL, + repeat_count integer NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone +); + + +-- +-- Name: jobs_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE jobs_id_seq + START WITH 1 + INCREMENT BY 1 + NO MAXVALUE + NO MINVALUE + CACHE 1; + + +-- +-- Name: jobs_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE jobs_id_seq OWNED BY jobs.id; + + -- -- Name: pool_versions; Type: TABLE; Schema: public; Owner: -; Tablespace: -- @@ -832,6 +867,38 @@ CREATE SEQUENCE pools_id_seq ALTER SEQUENCE pools_id_seq OWNED BY pools.id; +-- +-- Name: post_moderation_details; Type: TABLE; Schema: public; Owner: -; Tablespace: +-- + +CREATE TABLE post_moderation_details ( + id integer NOT NULL, + user_id integer NOT NULL, + post_id integer NOT NULL, + created_at timestamp without time zone, + updated_at timestamp without time zone +); + + +-- +-- Name: post_moderation_details_id_seq; Type: SEQUENCE; Schema: public; Owner: - +-- + +CREATE SEQUENCE post_moderation_details_id_seq + START WITH 1 + INCREMENT BY 1 + NO MAXVALUE + NO MINVALUE + CACHE 1; + + +-- +-- Name: post_moderation_details_id_seq; Type: SEQUENCE OWNED BY; Schema: public; Owner: - +-- + +ALTER SEQUENCE post_moderation_details_id_seq OWNED BY post_moderation_details.id; + + -- -- Name: post_versions; Type: TABLE; Schema: public; Owner: -; Tablespace: -- @@ -1047,6 +1114,7 @@ CREATE TABLE tags ( view_count integer DEFAULT 0 NOT NULL, category integer DEFAULT 0 NOT NULL, related_tags text, + related_tags_updated_at timestamp without time zone, created_at timestamp without time zone, updated_at timestamp without time zone ); @@ -1445,6 +1513,13 @@ ALTER TABLE forum_posts ALTER COLUMN id SET DEFAULT nextval('forum_posts_id_seq' ALTER TABLE forum_topics ALTER COLUMN id SET DEFAULT nextval('forum_topics_id_seq'::regclass); +-- +-- Name: id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE jobs ALTER COLUMN id SET DEFAULT nextval('jobs_id_seq'::regclass); + + -- -- Name: id; Type: DEFAULT; Schema: public; Owner: - -- @@ -1459,6 +1534,13 @@ ALTER TABLE pool_versions ALTER COLUMN id SET DEFAULT nextval('pool_versions_id_ ALTER TABLE pools ALTER COLUMN id SET DEFAULT nextval('pools_id_seq'::regclass); +-- +-- Name: id; Type: DEFAULT; Schema: public; Owner: - +-- + +ALTER TABLE post_moderation_details ALTER COLUMN id SET DEFAULT nextval('post_moderation_details_id_seq'::regclass); + + -- -- Name: id; Type: DEFAULT; Schema: public; Owner: - -- @@ -1711,6 +1793,14 @@ ALTER TABLE ONLY forum_topics ADD CONSTRAINT forum_topics_pkey PRIMARY KEY (id); +-- +-- Name: jobs_pkey; Type: CONSTRAINT; Schema: public; Owner: -; Tablespace: +-- + +ALTER TABLE ONLY jobs + ADD CONSTRAINT jobs_pkey PRIMARY KEY (id); + + -- -- Name: pool_versions_pkey; Type: CONSTRAINT; Schema: public; Owner: -; Tablespace: -- @@ -1727,6 +1817,14 @@ ALTER TABLE ONLY pools ADD CONSTRAINT pools_pkey PRIMARY KEY (id); +-- +-- Name: post_moderation_details_pkey; Type: CONSTRAINT; Schema: public; Owner: -; Tablespace: +-- + +ALTER TABLE ONLY post_moderation_details + ADD CONSTRAINT post_moderation_details_pkey PRIMARY KEY (id); + + -- -- Name: post_versions_pkey; Type: CONSTRAINT; Schema: public; Owner: -; Tablespace: -- @@ -2159,6 +2257,20 @@ CREATE INDEX index_pools_on_creator_id ON pools USING btree (creator_id); CREATE INDEX index_pools_on_name ON pools USING btree (name); +-- +-- Name: index_post_moderation_details_on_post_id; Type: INDEX; Schema: public; Owner: -; Tablespace: +-- + +CREATE INDEX index_post_moderation_details_on_post_id ON post_moderation_details USING btree (post_id); + + +-- +-- Name: index_post_moderation_details_on_user_id; Type: INDEX; Schema: public; Owner: -; Tablespace: +-- + +CREATE INDEX index_post_moderation_details_on_user_id ON post_moderation_details USING btree (user_id); + + -- -- Name: index_post_versions_on_post_id; Type: INDEX; Schema: public; Owner: -; Tablespace: -- @@ -2463,4 +2575,8 @@ INSERT INTO schema_migrations (version) VALUES ('20100219230537'); INSERT INTO schema_migrations (version) VALUES ('20100221003655'); -INSERT INTO schema_migrations (version) VALUES ('20100221005812'); \ No newline at end of file +INSERT INTO schema_migrations (version) VALUES ('20100221005812'); + +INSERT INTO schema_migrations (version) VALUES ('20100221012656'); + +INSERT INTO schema_migrations (version) VALUES ('20100223001012'); \ No newline at end of file diff --git a/db/migrate/20100205162521_create_tags.rb b/db/migrate/20100205162521_create_tags.rb index 5b2f93ded..a82e82f43 100644 --- a/db/migrate/20100205162521_create_tags.rb +++ b/db/migrate/20100205162521_create_tags.rb @@ -6,6 +6,7 @@ class CreateTags < ActiveRecord::Migration t.column :view_count, :integer, :null => false, :default => 0 t.column :category, :integer, :null => false, :default => 0 t.column :related_tags, :text + t.column :related_tags_updated_at, :datetime t.timestamps end diff --git a/db/migrate/20100221012656_create_jobs.rb b/db/migrate/20100221012656_create_jobs.rb new file mode 100644 index 000000000..0edc8fe55 --- /dev/null +++ b/db/migrate/20100221012656_create_jobs.rb @@ -0,0 +1,16 @@ +class CreateJobs < ActiveRecord::Migration + def self.up + create_table :jobs do |t| + t.column :category, :string, :null => false + t.column :status, :string, :null => false + t.column :message, :text, :null => false + t.column :data_as_json, :text, :null => false + t.column :repeat_count, :integer, :null => false + t.timestamps + end + end + + def self.down + drop_table :jobs + end +end diff --git a/db/migrate/20100223001012_create_post_moderation_details.rb b/db/migrate/20100223001012_create_post_moderation_details.rb new file mode 100644 index 000000000..f6a33f96b --- /dev/null +++ b/db/migrate/20100223001012_create_post_moderation_details.rb @@ -0,0 +1,16 @@ +class CreatePostModerationDetails < ActiveRecord::Migration + def self.up + create_table :post_moderation_details do |t| + t.column :user_id, :integer, :null => false + t.column :post_id, :integer, :null => false + t.timestamps + end + + add_index :post_moderation_details, :user_id + add_index :post_moderation_details, :post_id + end + + def self.down + drop_table :post_moderation_details + end +end diff --git a/test/fixtures/hidden_posts.yml b/test/fixtures/hidden_posts.yml new file mode 100644 index 000000000..289334110 --- /dev/null +++ b/test/fixtures/hidden_posts.yml @@ -0,0 +1,11 @@ +# Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html + +# This model initially had no columns defined. If you add columns to the +# model remove the '{}' from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/fixtures/jobs.yml b/test/fixtures/jobs.yml new file mode 100644 index 000000000..289334110 --- /dev/null +++ b/test/fixtures/jobs.yml @@ -0,0 +1,11 @@ +# Read about fixtures at http://ar.rubyonrails.org/classes/Fixtures.html + +# This model initially had no columns defined. If you add columns to the +# model remove the '{}' from the fixture names and add the columns immediately +# below each fixture, per the syntax in the comments below +# +one: {} +# column: value +# +two: {} +# column: value diff --git a/test/unit/job_test.rb b/test/unit/job_test.rb new file mode 100644 index 000000000..24442ff1f --- /dev/null +++ b/test/unit/job_test.rb @@ -0,0 +1,8 @@ +require 'test_helper' + +class JobTest < ActiveSupport::TestCase + # Replace this with your real tests. + test "the truth" do + assert true + end +end diff --git a/test/unit/post_moderation_detail_test.rb b/test/unit/post_moderation_detail_test.rb new file mode 100644 index 000000000..eaf09e721 --- /dev/null +++ b/test/unit/post_moderation_detail_test.rb @@ -0,0 +1,42 @@ +require File.dirname(__FILE__) + '/../test_helper' + +class PostModerationDetailTest < ActiveSupport::TestCase + context "A post moderation detail" do + should "hide posts" do + posts = [] + posts << Factory.create(:post) + posts << Factory.create(:post) + posts << Factory.create(:post) + user = Factory.create(:user) + detail = PostModerationDetail.create(:user => user, :post => posts[0]) + results = PostModerationDetail.filter(posts, user) + assert_equal(2, results.size) + assert(results.all? {|x| x.id != posts[0].id}) + results = PostModerationDetail.filter(posts, user, true) + assert_equal(1, results.size) + assert_equal(posts[0].id, results[0].id) + user = Factory.create(:user) + results = PostModerationDetail.filter(posts, user) + assert_equal(3, results.size) + results = PostModerationDetail.filter(posts, user, true) + assert_equal(0, results.size) + end + + should "prune itself" do + post = Factory.create(:post, :is_flagged => true) + user = Factory.create(:user) + detail = PostModerationDetail.create(:user => user, :post => post) + assert_difference("PostModerationDetail.count", 0) do + PostModerationDetail.prune! + end + post.is_flagged = false + post.updater_id = user.id + post.updater_ip_addr = "127.0.0.1" + post.save + assert(post.errors.empty?) + assert_difference("PostModerationDetail.count", -1) do + PostModerationDetail.prune! + end + end + end +end diff --git a/test/unit/related_tag_calculator_test.rb b/test/unit/related_tag_calculator_test.rb new file mode 100644 index 000000000..a9bdfe60c --- /dev/null +++ b/test/unit/related_tag_calculator_test.rb @@ -0,0 +1,41 @@ +require File.dirname(__FILE__) + '/../test_helper' + +class RelatedTagCalculatorTest < ActiveSupport::TestCase + context "A related tag calculator" do + should "calculate related tags for a tag" do + posts = [] + posts << Factory.create(:post, :tag_string => "aaa bbb ccc ddd") + posts << Factory.create(:post, :tag_string => "aaa bbb ccc") + posts << Factory.create(:post, :tag_string => "aaa bbb") + + tag = Tag.find_by_name("aaa") + calculator = RelatedTagCalculator.new + assert_equal({"bbb" => 3, "ccc" => 2, "ddd" => 1}, calculator.calculate_from_sample("aaa")) + end + + should "calculate related tags for a tag" do + posts = [] + posts << Factory.create(:post, :tag_string => "aaa bbb art:ccc copy:ddd") + posts << Factory.create(:post, :tag_string => "aaa bbb art:ccc") + posts << Factory.create(:post, :tag_string => "aaa bbb") + + tag = Tag.find_by_name("aaa") + calculator = RelatedTagCalculator.new + assert_equal({"ccc" => 2}, calculator.calculate_from_sample("aaa", Tag.categories.artist)) + calculator = RelatedTagCalculator.new + assert_equal({"ddd" => 1}, calculator.calculate_from_sample("aaa", Tag.categories.copyright)) + end + + should "convert a hash into string format" do + posts = [] + posts << Factory.create(:post, :tag_string => "aaa bbb ccc ddd") + posts << Factory.create(:post, :tag_string => "aaa bbb ccc") + posts << Factory.create(:post, :tag_string => "aaa bbb") + + tag = Tag.find_by_name("aaa") + calculator = RelatedTagCalculator.new + counts = calculator.calculate_from_sample("aaa") + assert_equal("bbb 3 ccc 2 ddd 1", calculator.convert_hash_to_string(counts)) + end + end +end