zeek/policy.old/http-anon-useragent.bro
2011-03-01 10:51:44 -05:00

111 lines
3.1 KiB
Text

# $Id:$
# Filter-in known "USER-AGENT:" values.
@load anon
@load http-anon-utils
# ---------------------------------------------------------------------
# Mozilla (and friends)
# ---------------------------------------------------------------------
const mozilla_full_pat =
/mozilla\/[0-9]\.[0-9] \(( *|;|iebar| freebsd i[0-9]{1,4}|fr|-|windows|windows 98|sunos sun4u|compatible|msie [0-9]\.[0-9]|windows nt [0-9]\.[0-9]|google-tr-1|sv1|\.net clr ([0-9]\.)*[0-9]+|x11|en|ppc mac os x|macintosh|u|linux i[0-9]{1,4}|en-us|rv\:([0-9]+\.)*[0-9]+|aol [0-9]\.[0-9]|gnotify ([0-9]+\.)*[0-9]+)*\) *(gecko\/[0-9]+)? *(firefox\/([0-9]+.)*[0-9]+)?/;
const mozilla_head_pat = /mozilla\/[0-9]\.[0-9]/;
const misc_user_pat =
/spiderman/
| /w3m\/([0-9]+\.)*[0-9]+/
| /java([0-9]+\.)*[0-9]+(_[0-9]+)?/
| /java\/([0-9]+\.)*[0-9]+(_[0-9]+)?/
| /freecorder/
| /industry update control/
| /microsoft-cryptoapi\/([0-9]+\.)*[0-9]+/
| /ruriko\/([0-9]+\.)*[0-9]+/
| /crawler[0-9]\.[0-9]/
| /w3search/
| /symantec liveupdate/
| /davkit\/[0-9]\.[0-9]/
| /windows-media-player\/([0-9]+\.)*[0-9]+/
| /winamp\/([0-9]+\.)*[0-9]+/
| /headdump/
;
const misc_cmplx_user_pat =
/lynx\/([0-9]+\.)*[0-9]+.*/
| /wget\/([0-9]+\.)*[0-9]+.*/
| /yahooseeker\/([0-9]+\.)*[0-9]+.*/
| /rma\/([0-9]+\.)*[0-9]+.*/
| /aim\/[0-9]+.*/
| /ichiro\/([0-9]+\.)*[0-9]+.*/
| /unchaos.*/
| /irlbot\/[0-9]\.[0-9]+.*/
| /msnbot\/([0-9]+\.)*[0-9]+.*/
| /opera\/([0-9]+\.)*[0-9]+.*/
| /netnewswire\/([0-9]+\.)*[0-9]+.*/
| /nsplayer\/([0-9]+\.)*[0-9]+.*/
| /aipbot\/([0-9]+\.)*[0-9]+.*/
| /mac os x; webservicescore\.framework.*/
| /fast-webcrawler\/([0-9]+\.)*[0-9]+.*/
| /skype.*/
| /googlebot\/([0-9]+\.)*[0-9]+.*/
;
const misc_cmplx_user_start =
/lynx\/([0-9]+\.)*[0-9]+/
| /wget\/([0-9]+\.)*[0-9]+/
| /yahooseeker\/([0-9]+\.)*[0-9]+/
| /rma\/([0-9]+\.)*[0-9]+/
| /aim\/[0-9]+/
| /ichiro\/([0-9]+\.)*[0-9]+/
| /unchaos/
| /irlbot\/[0-9]\.[0-9]+/
| /opera\/([0-9]+\.)*[0-9]+/
| /msnbot\/([0-9]+\.)*[0-9]+/
| /netnewswire\/([0-9]+\.)*[0-9]+/
| /nsplayer\/([0-9]+\.)*[0-9]+/
| /aipbot\/([0-9]+\.)*[0-9]+/
| /mac os x; webservicescore\.framework/
| /fast-webcrawler\/([0-9]+\.)*[0-9]+/
| /skype/
| /googlebot\/([0-9]+\.)*[0-9]+/
;
function filter_in_http_useragent(user: string): string
{
# Check for an exact match for Mozilla.
if ( mozilla_full_pat in user )
return split_all(user, mozilla_full_pat)[2];
# Look for popular Mozilla-compatible crawlers.
if ( mozilla_head_pat in user )
{
local crawler = "(bro: unknown)";
if ( /.*yahoo\! slurp/ in user )
crawler = "(yahoo! slurp)";
else if ( /.*ask jeeves/ in user )
crawler = "(ask jeeves)";
else
print http_anon_log,
fmt("*** unknown Mozilla user-agent %s\n", user);
return fmt("%s %s", split_all(user, mozilla_head_pat)[2],
crawler);
}
# Some simple, common user names.
if ( misc_user_pat in user )
return user;
# Require some info removal.
if ( misc_cmplx_user_pat in user )
return split_all(user, misc_cmplx_user_pat)[2];
print http_anon_log,fmt("*** unknown user agent %s\n", user);
return fmt("(bro: unknown) %s", anonymize_arg("user-agent", user));
}