Parse and log user agent type to NewRelic.
Parse the user agent and log whether it seems like a known bot or a human to NewRelic under the `user.bot` request attribute. This is so that known bots can be filtered out of search traffic analytics. Bots and search crawlers make up a significant portion of search traffic.
This commit is contained in:
@@ -74,7 +74,8 @@ class DanbooruLogger
|
||||
level: user&.level_string,
|
||||
ip: request.remote_ip,
|
||||
country: CurrentUser.country,
|
||||
safe_mode: CurrentUser.safe_mode?
|
||||
safe_mode: CurrentUser.safe_mode?,
|
||||
bot: UserAgent.new(request.headers["HTTP_USER_AGENT"]).is_bot?,
|
||||
}
|
||||
end
|
||||
|
||||
|
||||
223
app/logical/user_agent.rb
Normal file
223
app/logical/user_agent.rb
Normal file
@@ -0,0 +1,223 @@
|
||||
# Parses a user agent string and tries to determine whether it's a known bot or a known web browser.
|
||||
#
|
||||
# @see https://developers.whatismybrowser.com/
|
||||
# @see https://developers.whatismybrowser.com/api/features/user-agent-checks/weird/
|
||||
class UserAgent
|
||||
attr_reader :user_agent
|
||||
|
||||
# Initialize a user agent
|
||||
#
|
||||
# @param user_agent [String] the user agent string
|
||||
def initialize(user_agent)
|
||||
@user_agent = user_agent.to_s
|
||||
end
|
||||
|
||||
# @return [String] the name of the user agent
|
||||
def name
|
||||
if user_agent.blank?
|
||||
"blank"
|
||||
|
||||
# Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
||||
# Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.90 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html
|
||||
# Googlebot-Image/1.0
|
||||
elsif user_agent =~ %r{http://www\.google\.com/bot\.html} || user_agent =~ %r{Googlebot}
|
||||
"googlebot"
|
||||
|
||||
# Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)
|
||||
# Mozilla/5.0 (compatible; YandexImages/3.0; +http://yandex.com/bots)
|
||||
elsif user_agent =~ %r{http://yandex\.com/bots}
|
||||
"yandexbot"
|
||||
|
||||
# Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)
|
||||
elsif user_agent =~ %r{http://www\.bing\.com/bingbot\.htm}
|
||||
"bingbot"
|
||||
|
||||
# Mozilla/5.0 (Linux; Android 7.0;) AppleWebKit/537.36 (KHTML, like Gecko) Mobile Safari/537.36 (compatible; PetalBot;+https://webmaster.petalsearch.com/site/petalbot)
|
||||
elsif user_agent =~ %r{PetalBot}
|
||||
"petalbot"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Grabber/7.5.1
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:52.0) Gecko/20100101 Firefox/52.0 Grabber/7.3.2
|
||||
# (see https://github.com/Bionus/imgbrd-grabber)
|
||||
elsif user_agent =~ %r{Firefox/52\.0 Grabber/[^ ]+\z}
|
||||
"imgbrd-grabber"
|
||||
|
||||
# Mozilla/5.0 (Linux; U; Android 11; en_GB; M2101K7AG; Build/RKQ1.201022.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 onlymash.flexbooru.play/2.7.5.c1180 Mobile Safari/537.36
|
||||
# (see https://github.com/flexbooru/flexbooru)
|
||||
elsif user_agent =~ %r{flexbooru}
|
||||
"flexbooru"
|
||||
|
||||
# Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_1) AppleWebKit/601.2.4 (KHTML, like Gecko) Version/9.0.1 Safari/601.2.4 facebookexternalhit/1.1 Facebot Twitterbot/1.0
|
||||
# (spoofed by iMessage; see https://stackoverflow.com/questions/41499402 and https://www.reddit.com/r/iOSProgramming/comments/4wcake)
|
||||
elsif user_agent =~ %r{\AMozilla/5.0.*facebookexternalhit.*Twitterbot}
|
||||
"imessage"
|
||||
|
||||
# http.rb/3.2.0 (Mastodon/2.4.4; +https://queer.af/)
|
||||
# http.rb/4.4.1 (Mastodon/3.4.0; +https://mastodon.social/) Bot
|
||||
# http.rb/4.4.1 (Mastodon/3.4.1; +https://raru.re/) Bot
|
||||
elsif user_agent =~ %r{Mastodon}
|
||||
"mastodon"
|
||||
|
||||
# facebookexternalhit/1.1 (+http://www.facebook.com/externalhit_uatext.php)
|
||||
# facebookexternalhit/1.1;line-poker/1.0
|
||||
elsif user_agent =~ %r{\Afacebookexternallhit}
|
||||
"facebook"
|
||||
|
||||
# Slackbot 1.0 (+https://api.slack.com/robots)
|
||||
elsif user_agent =~ %r{Slackbot}
|
||||
"slackbot"
|
||||
|
||||
# TelegramBot (like TwitterBot)
|
||||
elsif user_agent =~ %r{\ATelegramBot}
|
||||
"telegrambot"
|
||||
|
||||
# Python/3.9 aiohttp/3.6.3
|
||||
# Python/3.6 aiohttp/3.7.4.post0
|
||||
elsif user_agent =~ %r{aiohttp}
|
||||
"aiohttp"
|
||||
|
||||
# Mozilla/4.0 (compatible; MSIE 5.01; Windows NT 5.0)
|
||||
# Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 10.0; WOW64; Trident/7.0; Sleipnir6/6.4.12; SleipnirSiteUpdates/6.4.12)
|
||||
# Mozilla/5.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/3.0),gzip(gfe)
|
||||
# Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/3.0),gzip(gfe)
|
||||
elsif user_agent =~ %r{\AMozilla/[^ ]+ \(compatible; MSIE}
|
||||
"msie"
|
||||
|
||||
# Mozilla/5.0 (compatible; Hydrus Client)
|
||||
# Mozilla/5.0 (compatible; SauceNAO Booru Aggregator 1.0)
|
||||
# Mozilla/5.0 (compatible; inoreader.com; 1 subscribers)
|
||||
# Mozilla/5.0 (compatible; AhrefsBot/7.0; +http://ahrefs.com/robot/)
|
||||
# Mozilla/5.0 (compatible; coccocbot-web/1.0; +http://help.coccoc.com/searchengine)
|
||||
# Mozilla/5.0 (compatible; SemrushBot/7~bl; +http://www.semrush.com/bot.html)
|
||||
# Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)
|
||||
# Mozilla/5.0 (compatible; DotBot/1.2; +https://opensiteexplorer.org/dotbot; help@moz.com)
|
||||
# Mozilla/5.0 (compatible; Pinterestbot/1.0; +http://www.pinterest.com/bot.html)
|
||||
# Mozilla/5.0 (compatible; MegaIndex.ru/2.0; +http://megaindex.com/crawler)
|
||||
# Mozilla/5.0 (compatible; MJ12bot/v1.4.8; http://mj12bot.com/)
|
||||
elsif user_agent =~ %r{\AMozilla/5\.0 \(compatible; ([^/;)]+)}
|
||||
$1.downcase
|
||||
|
||||
# Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.17
|
||||
# Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14
|
||||
# Opera/9.80 (Android; Opera Mini/57.0.2254/191.236; U; en) Presto/2.12.423 Version/12.16
|
||||
# Opera/9.80 (X11; Fedora; Linux x64) Presto/2.12.388 Version/12.18
|
||||
elsif user_agent =~ %r{\AOpera/9\.80}
|
||||
"opera"
|
||||
|
||||
# Blogtrottr/2.0
|
||||
# BooruNav.Android/1.0.0.296b
|
||||
# com.apple.Safari.SearchHelper/16611.2.7.1.4 CFNetwork/1240.0.4 Darwin/20.5.0
|
||||
# CUMZONATORBOT/1.0 kotb
|
||||
# curl/7.61.1
|
||||
# feedparser/6.0.2 +https://github.com/kurtmckee/feedparser/
|
||||
# FeedDemon/4.5 (http://www.feeddemon.com/; Microsoft Windows)
|
||||
# FreshRSS/1.18.0 (Linux; https://freshrss.org)
|
||||
# Go-http-client/2.0
|
||||
# Illustail/38 CFNetwork/1240.0.4 Darwin/20.5.0
|
||||
# ImageBoardAPI/https://github.com/Kodehawa/imageboard-api
|
||||
# Irvine/1.3.0
|
||||
# iqdb/0.1 (+http://iqdb.org/)
|
||||
# LoliSnatcher_Droid/1.8.1
|
||||
# Mantaro/6.2.7/JDA-DiscordBot (https://github.com/Mantaro/MantaroBot)
|
||||
# MetalBrowser/5.0.0.0
|
||||
# multibooru/0.1beta (Linux)
|
||||
# node-fetch/1.0 (+https://github.com/bitinn/node-fetch)
|
||||
# Node/RssFeedEmitter (https://github.com/filipedeschamps/rss-feed-emitter)
|
||||
# nori/3.5.0
|
||||
# okhttp/4.9.1
|
||||
# PassiveCrawler.green-hill/1.0.0.1
|
||||
# Pybooru/4.2.2
|
||||
# python-requests/2.22.0
|
||||
# Python-urllib/3.8
|
||||
# RSSOwlnix/2.8.0.202006031646 (Windows; U; en)
|
||||
# Twitterbot/1.0
|
||||
# Valve/Steam HTTP Client 1.0
|
||||
# Wget/1.21.1
|
||||
elsif user_agent !~ %r{\AMozilla/5\.0} && user_agent =~ %r{\A([^ /]*)/([^ ]+)}i
|
||||
$1.downcase
|
||||
|
||||
# booru (https://github.com/AtlasTheBot/booru)
|
||||
# got (https://github.com/sindresorhus/got)
|
||||
# Kaori, a npm module for boorus. v2 (https://github.com/iCrawl/kaori/)
|
||||
# Tiny Tiny RSS/21.06-7bd9572aa (http://tt-rss.org/)
|
||||
elsif user_agent =~ %r{(https?://[^ )]+)}
|
||||
$1
|
||||
|
||||
# Booru v1.2.0, a node package for booru searching (by AtlasTheBot)
|
||||
# Danbooru-Aggregator
|
||||
# EcchiBot / PrivateGER Discord Bot / Contact: <privateger@privateger.me>
|
||||
# Ktor client
|
||||
# MobileSafari/604.1 CFNetwork/1240.0.4 Darwin/20.5.0
|
||||
# Universal Booru Wrapper (Alejandro Akbal)
|
||||
# Mozilla/5.0
|
||||
# Mozilla/5.0 BooruSharp
|
||||
elsif user_agent !~ %r{\AMozilla/5\.0 \([^)]+\)}
|
||||
"unknown-bot"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0
|
||||
# Mozilla/5.0 (Windows NT 6.1; WOW64; rv:89.0) Gecko/20100101 Firefox/89.0
|
||||
# Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0
|
||||
# Mozilla/5.0 (Windows NT 6.3; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0
|
||||
# Mozilla/5.0 (Windows NT 10.0; rv:78.0) Gecko/20100101 Firefox/78.0
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0
|
||||
# Mozilla/5.0 (Android 11; Mobile; rv:89.0) Gecko/89.0 Firefox/89.0
|
||||
# Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:82.0) Gecko/20100101 Firefox/82.0
|
||||
# Mozilla/5.0 (X11; Linux x86_64; rv:89.0) Gecko/20100101 Firefox/89.0
|
||||
elsif user_agent =~ %r{Gecko/[^ ]+ Firefox/[^ ]+}
|
||||
"firefox"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.106 Safari/537.36
|
||||
# Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36
|
||||
elsif user_agent =~ %r{AppleWebKit/[^ ]+ \(KHTML, like Gecko\) Chrome/[^ ]+ Safari/[^ ]+\z}
|
||||
"chrome"
|
||||
|
||||
# Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1
|
||||
# Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.80 Mobile/15E148 Safari/604.1
|
||||
# Mozilla/5.0 (iPhone; CPU iPhone OS 14_4_2 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.0.3 Mobile/15E148 Safari/604.1
|
||||
# Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Safari/605.1.15
|
||||
# Mozilla/5.0 (iPad; CPU OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) CriOS/91.0.4472.80 Mobile/15E148 Safari/604.1
|
||||
# Mozilla/5.0 (PlayStation; PlayStation 4/8.52) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/12.0 Safari/605.1.15
|
||||
elsif user_agent =~ %r{AppleWebKit/[^ ]+ \(KHTML, like Gecko\).*Safari/[^ ]+\z} && user_agent !~ %r{Chrome}
|
||||
"safari"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.54
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.59
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64; Xbox; Xbox One) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19041
|
||||
# Mozilla/5.0 (Linux; Android 10; SM-A102U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.116 Mobile Safari/537.36 EdgA/46.04.2.5157
|
||||
elsif user_agent =~ %r{AppleWebKit/[^ ]+ \(KHTML, like Gecko\).*Edg[eA]?/[^ ]+\z}
|
||||
"edge"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36 OPR/75.0.3969.285
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 OPR/77.0.4054.90
|
||||
# Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 OPR/77.0.4054.90
|
||||
# Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36 OPR/77.0.4054.90 (Edition Yx 05)
|
||||
elsif user_agent =~ %r{AppleWebKit/[^ ]+ \(KHTML, like Gecko\) Chrome/[^ ]+ Safari/[^ ]+ OPR/[^ ]+}
|
||||
"opera"
|
||||
|
||||
# Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko
|
||||
# Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko
|
||||
# Mozilla/5.0 (Windows NT 6.3; WOW64; Trident/7.0; rv:11.0) like Gecko
|
||||
# Mozilla/5.0 (Windows NT 6.1; Trident/7.0; rv:11.0) like Gecko
|
||||
# Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Geck; GreenBrowser)
|
||||
# Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; Touch; rv:11.0) like Gecko
|
||||
elsif user_agent =~ %r{Trident/[^ ;)]+}
|
||||
"msie"
|
||||
|
||||
# Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.13 NintendoBrowser/4.3.2.11274.US
|
||||
# Mozilla/5.0 (PLAYSTATION 3 4.87) AppleWebKit/531.22.8 (KHTML, like Gecko)
|
||||
# Mozilla/5.0 (PlayStation Vita 3.73) AppleWebKit/537.73 (KHTML, like Gecko) Silk/3.2
|
||||
# Mozilla/5.0 (Linux; NetCast; U) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36 SmartTV/10.0 Colt/2.0
|
||||
# Mozilla/5.0 (Nintendo 3DS; U; ; en) Version/1.7639.EU
|
||||
# Mozilla/5.0 (Nintendo Switch; WifiWebAuthApplet) AppleWebKit/606.4 (KHTML, like Gecko) NF/6.0.1.16.11 NintendoBrowser/5.1.0.20935
|
||||
else
|
||||
"unknown-browser"
|
||||
end
|
||||
end
|
||||
|
||||
# Returns true if the agent is a known bot (or a human pretending to be a bot), or false if the agent
|
||||
# is a known web browser (or a bot pretending to be a known browser).
|
||||
def is_bot?
|
||||
!name.in?(%w[chrome firefox safari opera edge msie unknown-browser])
|
||||
end
|
||||
end
|
||||
30
test/unit/user_agent_test.rb
Normal file
30
test/unit/user_agent_test.rb
Normal file
@@ -0,0 +1,30 @@
|
||||
require 'test_helper'
|
||||
|
||||
class UserAgentTest < ActiveSupport::TestCase
|
||||
def assert_name_equals(name, user_agent)
|
||||
assert_equal(name, UserAgent.new(user_agent).name)
|
||||
end
|
||||
|
||||
context "UserAgent#name" do
|
||||
should "parse the user agent name correctly" do
|
||||
assert_name_equals("googlebot", "Mozilla/5.0 (Linux; Android 6.0.1; Nexus 5X Build/MMB29P) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.90 Mobile Safari/537.36 (compatible; Googlebot/2.1; +http://www.google.com/bot.html")
|
||||
assert_name_equals("yandexbot", "Mozilla/5.0 (compatible; YandexBot/3.0; +http://yandex.com/bots)")
|
||||
assert_name_equals("bingbot", "Mozilla/5.0 (compatible; bingbot/2.0; +http://www.bing.com/bingbot.htm)")
|
||||
assert_name_equals("discordbot", "Mozilla/5.0 (compatible; Discordbot/2.0; +https://discordapp.com)")
|
||||
assert_name_equals("twitterbot", "Twitterbot/1.0")
|
||||
assert_name_equals("curl", "curl/7.61.1")
|
||||
assert_name_equals("https://github.com/AtlasTheBot/booru", "booru (https://github.com/AtlasTheBot/booru)")
|
||||
assert_name_equals("http://tt-rss.org/", "Tiny Tiny RSS/21.06-7bd9572aa (http://tt-rss.org/)")
|
||||
assert_name_equals("unknown-bot", "Booru v1.2.0, a node package for booru searching (by AtlasTheBot)")
|
||||
|
||||
assert_name_equals("chrome", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36")
|
||||
assert_name_equals("firefox", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:89.0) Gecko/20100101 Firefox/89.0")
|
||||
assert_name_equals("safari", "Mozilla/5.0 (iPhone; CPU iPhone OS 14_6 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/14.1.1 Mobile/15E148 Safari/604.1")
|
||||
assert_name_equals("edge", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36 Edg/91.0.864.54")
|
||||
assert_name_equals("opera", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36 OPR/75.0.3969.285")
|
||||
assert_name_equals("opera", "Opera/9.80 (Windows NT 6.1; WOW64) Presto/2.12.388 Version/12.17")
|
||||
assert_name_equals("msie", "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko")
|
||||
assert_name_equals("unknown-browser", "Mozilla/5.0 (Nintendo WiiU) AppleWebKit/536.30 (KHTML, like Gecko) NX/3.0.4.2.13 NintendoBrowser/4.3.2.11274.US")
|
||||
end
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user