From 844a077599886798f2f220992045a1868d8613fb Mon Sep 17 00:00:00 2001 From: Coconut Date: Fri, 29 Mar 2013 10:27:55 -0400 Subject: [PATCH] Strip junk from Pixiv URLs before indexing, #726 --- app/logical/post_query_builder.rb | 9 ++++--- ...39_change_source_pattern_index_on_posts.rb | 24 +++++++++++++++++++ 2 files changed, 30 insertions(+), 3 deletions(-) create mode 100644 db/migrate/20130328092739_change_source_pattern_index_on_posts.rb diff --git a/app/logical/post_query_builder.rb b/app/logical/post_query_builder.rb index 5f2dfd8ec..cc03c74ba 100644 --- a/app/logical/post_query_builder.rb +++ b/app/logical/post_query_builder.rb @@ -150,14 +150,17 @@ class PostQueryBuilder relation = relation.where("posts.is_deleted = FALSE") end + # The SourcePattern SQL function replaces Pixiv sources with "pixiv/[suffix]", where + # [suffix] is everything past the second-to-last slash in the URL. It leaves non-Pixiv + # URLs unchanged. This is to ease database load for Pixiv source searches. if q[:source] if q[:source] == "none%" relation = relation.where("(posts.source = '' OR posts.source IS NULL)") - elsif q[:source] =~ /pixiv/ - raise ::Post::SearchError.new("pixiv source searches temporarily disabled") + elsif q[:source] =~ /^%\.?pixiv(\.net(\/img)?)?(%\/|(?=%$))(.+)$/ + relation = relation.where("SourcePattern(posts.source) LIKE ? ESCAPE E'\\\\'", "pixiv/" + $5) has_constraints! else - relation = relation.where("posts.source LIKE ? ESCAPE E'\\\\'", q[:source]) + relation = relation.where("SourcePattern(posts.source) LIKE SourcePattern(?) ESCAPE E'\\\\'", q[:source]) has_constraints! end end diff --git a/db/migrate/20130328092739_change_source_pattern_index_on_posts.rb b/db/migrate/20130328092739_change_source_pattern_index_on_posts.rb new file mode 100644 index 000000000..523070ce0 --- /dev/null +++ b/db/migrate/20130328092739_change_source_pattern_index_on_posts.rb @@ -0,0 +1,24 @@ +class ChangeSourcePatternIndexOnPosts < ActiveRecord::Migration + def up + execute "set statement_timeout = 0" + execute "DROP INDEX index_posts_on_pixiv_suffix;" + execute "DROP INDEX index_posts_on_source_pattern;" + execute "CREATE FUNCTION SourcePattern(src text) RETURNS text AS $$ + BEGIN + RETURN regexp_replace(src, '^[^/]*(//)?[^/]*\.pixiv\.net/img.*(/[^/]*/[^/]*)$', 'pixiv\2'); + END; + $$ LANGUAGE plpgsql IMMUTABLE RETURNS NULL ON NULL INPUT;" + execute "CREATE INDEX index_posts_on_source_pattern ON posts USING btree + ((SourcePattern(source)) text_pattern_ops);" + end + + def down + execute "set statement_timeout = 0" + execute "DROP INDEX index_posts_on_source_pattern;" + execute "DROP FUNCTION SourcePattern(text);" + execute "CREATE INDEX index_posts_on_source_pattern ON posts USING btree + (source text_pattern_ops);" + execute "CREATE INDEX index_posts_on_pixiv_suffix ON posts USING btree + ((substring(source, 'pixiv.net/img.*/([^/]*/[^/]*)$')) text_pattern_ops);" + end +end