mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 06:38:20 +00:00
111 lines
3.1 KiB
Text
111 lines
3.1 KiB
Text
# $Id:$
|
|
|
|
# Filter-in known "USER-AGENT:" values.
|
|
|
|
@load anon
|
|
@load http-anon-utils
|
|
|
|
# ---------------------------------------------------------------------
|
|
# Mozilla (and friends)
|
|
# ---------------------------------------------------------------------
|
|
|
|
const mozilla_full_pat =
|
|
/mozilla\/[0-9]\.[0-9] \(( *|;|iebar| freebsd i[0-9]{1,4}|fr|-|windows|windows 98|sunos sun4u|compatible|msie [0-9]\.[0-9]|windows nt [0-9]\.[0-9]|google-tr-1|sv1|\.net clr ([0-9]\.)*[0-9]+|x11|en|ppc mac os x|macintosh|u|linux i[0-9]{1,4}|en-us|rv\:([0-9]+\.)*[0-9]+|aol [0-9]\.[0-9]|gnotify ([0-9]+\.)*[0-9]+)*\) *(gecko\/[0-9]+)? *(firefox\/([0-9]+.)*[0-9]+)?/;
|
|
|
|
const mozilla_head_pat = /mozilla\/[0-9]\.[0-9]/;
|
|
|
|
const misc_user_pat =
|
|
/spiderman/
|
|
| /w3m\/([0-9]+\.)*[0-9]+/
|
|
| /java([0-9]+\.)*[0-9]+(_[0-9]+)?/
|
|
| /java\/([0-9]+\.)*[0-9]+(_[0-9]+)?/
|
|
| /freecorder/
|
|
| /industry update control/
|
|
| /microsoft-cryptoapi\/([0-9]+\.)*[0-9]+/
|
|
| /ruriko\/([0-9]+\.)*[0-9]+/
|
|
| /crawler[0-9]\.[0-9]/
|
|
| /w3search/
|
|
| /symantec liveupdate/
|
|
| /davkit\/[0-9]\.[0-9]/
|
|
| /windows-media-player\/([0-9]+\.)*[0-9]+/
|
|
| /winamp\/([0-9]+\.)*[0-9]+/
|
|
| /headdump/
|
|
;
|
|
|
|
const misc_cmplx_user_pat =
|
|
/lynx\/([0-9]+\.)*[0-9]+.*/
|
|
| /wget\/([0-9]+\.)*[0-9]+.*/
|
|
| /yahooseeker\/([0-9]+\.)*[0-9]+.*/
|
|
| /rma\/([0-9]+\.)*[0-9]+.*/
|
|
| /aim\/[0-9]+.*/
|
|
| /ichiro\/([0-9]+\.)*[0-9]+.*/
|
|
| /unchaos.*/
|
|
| /irlbot\/[0-9]\.[0-9]+.*/
|
|
| /msnbot\/([0-9]+\.)*[0-9]+.*/
|
|
| /opera\/([0-9]+\.)*[0-9]+.*/
|
|
| /netnewswire\/([0-9]+\.)*[0-9]+.*/
|
|
| /nsplayer\/([0-9]+\.)*[0-9]+.*/
|
|
| /aipbot\/([0-9]+\.)*[0-9]+.*/
|
|
| /mac os x; webservicescore\.framework.*/
|
|
| /fast-webcrawler\/([0-9]+\.)*[0-9]+.*/
|
|
| /skype.*/
|
|
| /googlebot\/([0-9]+\.)*[0-9]+.*/
|
|
;
|
|
|
|
const misc_cmplx_user_start =
|
|
/lynx\/([0-9]+\.)*[0-9]+/
|
|
| /wget\/([0-9]+\.)*[0-9]+/
|
|
| /yahooseeker\/([0-9]+\.)*[0-9]+/
|
|
| /rma\/([0-9]+\.)*[0-9]+/
|
|
| /aim\/[0-9]+/
|
|
| /ichiro\/([0-9]+\.)*[0-9]+/
|
|
| /unchaos/
|
|
| /irlbot\/[0-9]\.[0-9]+/
|
|
| /opera\/([0-9]+\.)*[0-9]+/
|
|
| /msnbot\/([0-9]+\.)*[0-9]+/
|
|
| /netnewswire\/([0-9]+\.)*[0-9]+/
|
|
| /nsplayer\/([0-9]+\.)*[0-9]+/
|
|
| /aipbot\/([0-9]+\.)*[0-9]+/
|
|
| /mac os x; webservicescore\.framework/
|
|
| /fast-webcrawler\/([0-9]+\.)*[0-9]+/
|
|
| /skype/
|
|
| /googlebot\/([0-9]+\.)*[0-9]+/
|
|
;
|
|
|
|
function filter_in_http_useragent(user: string): string
|
|
{
|
|
# Check for an exact match for Mozilla.
|
|
if ( mozilla_full_pat in user )
|
|
return split_all(user, mozilla_full_pat)[2];
|
|
|
|
# Look for popular Mozilla-compatible crawlers.
|
|
if ( mozilla_head_pat in user )
|
|
{
|
|
local crawler = "(bro: unknown)";
|
|
|
|
if ( /.*yahoo\! slurp/ in user )
|
|
crawler = "(yahoo! slurp)";
|
|
|
|
else if ( /.*ask jeeves/ in user )
|
|
crawler = "(ask jeeves)";
|
|
|
|
else
|
|
print http_anon_log,
|
|
fmt("*** unknown Mozilla user-agent %s\n", user);
|
|
|
|
return fmt("%s %s", split_all(user, mozilla_head_pat)[2],
|
|
crawler);
|
|
}
|
|
|
|
# Some simple, common user names.
|
|
if ( misc_user_pat in user )
|
|
return user;
|
|
|
|
# Require some info removal.
|
|
if ( misc_cmplx_user_pat in user )
|
|
return split_all(user, misc_cmplx_user_pat)[2];
|
|
|
|
print http_anon_log,fmt("*** unknown user agent %s\n", user);
|
|
|
|
return fmt("(bro: unknown) %s", anonymize_arg("user-agent", user));
|
|
}
|