zeek/scripts/base/utils/addrs.bro
Seth Hall 597a4d6704 Hopefully the last major script reorganization.
- policy/ renamed to scripts/

- By default BROPATH now contains:
	- scripts/
	- scripts/policy
	- scripts/site

- *Nearly* all tests pass.

- All of scripts/base/ is loaded by main.cc
	- Can be disabled by setting $BRO_NO_BASE_SCRIPTS
	- Scripts in scripts/base/ don't use relative path loading to ease use of BRO_NO_BASE_SCRIPTS (to copy and paste that script).

- The scripts in scripts/base/protocols/ only (or soon will only) do logging and state building.

- The scripts in scripts/base/frameworks/ add functionality without causing any additional overhead.

- All "detection" activity happens through scripts in scripts/policy/.

- Communications framework modified temporarily to need an environment variable to actually enable (ENABLE_COMMUNICATION=1)
	- This is so the communications framework can be loaded as part
	  of the base without causing trouble when it's not needed.
	- This will be removed once a resolution to ticket #540 is reached.
2011-08-05 23:09:53 -04:00

100 lines
3.7 KiB
Text

##! Functions for parsing and manipulating IP addresses.
# Regular expressions for matching IP addresses in strings.
const ipv4_addr_regex = /[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}/;
const ipv6_8hex_regex = /([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/;
const ipv6_compressed_hex_regex = /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/;
const ipv6_hex4dec_regex = /(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/;
const ipv6_compressed_hex4dec_regex = /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/;
# These are commented out until patterns can be constructed this way at init time.
#const ipv6_addr_regex = ipv6_8hex_regex |
# ipv6_compressed_hex_regex |
# ipv6_hex4dec_regex |
# ipv6_compressed_hex4dec_regex;
#const ip_addr_regex = ipv4_addr_regex | ipv6_addr_regex;
const ipv6_addr_regex =
/([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/ |
/(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/ | # IPv6 Compressed Hex
/(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/ | # 6Hex4Dec
/(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; # CompressedHex4Dec
const ip_addr_regex =
/[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}/ |
/([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/ |
/(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/ | # IPv6 Compressed Hex
/(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/ | # 6Hex4Dec
/(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; # CompressedHex4Dec
## Checks if all elements of a string array are a valid octet value.
## octets: an array of strings to check for valid octet values.
## Returns: T if every element is between 0 and 255, inclusive, else F.
function has_valid_octets(octets: string_array): bool
{
local num = 0;
for ( i in octets )
{
num = to_count(octets[i]);
if ( num < 0 || 255 < num )
return F;
}
return T;
}
## Checks if a string appears to be a valid IPv4 or IPv6 address.
## ip_str: the string to check for valid IP formatting.
## Returns: T if the string is a valid IPv4 or IPv6 address format.
function is_valid_ip(ip_str: string): bool
{
local octets: string_array;
if ( ip_str == ipv4_addr_regex )
{
octets = split(ip_str, /\./);
if ( |octets| != 4 )
return F;
return has_valid_octets(octets);
}
else if ( ip_str == ipv6_addr_regex )
{
if ( ip_str == ipv6_hex4dec_regex ||
ip_str == ipv6_compressed_hex4dec_regex )
{
# the regexes for hybrid IPv6-IPv4 address formats don't for valid
# octets within the IPv4 part, so do that now
octets = split(ip_str, /\./);
if ( |octets| != 4 )
return F;
# get rid of remaining IPv6 stuff in first octet
local tmp = split(octets[1], /:/);
octets[1] = tmp[|tmp|];
return has_valid_octets(octets);
}
else
{
# pure IPv6 address formats that only use hex digits don't need
# any additional checks -- the regexes should be complete
return T;
}
}
return F;
}
## Extracts all IP (v4 or v6) address strings from a given string.
## input: a string that may contain an IP address anywhere within it.
## Returns: an array containing all valid IP address strings found in input.
function find_ip_addresses(input: string): string_array
{
local parts = split_all(input, ip_addr_regex);
local output: string_array;
for ( i in parts )
{
if ( i % 2 == 0 && is_valid_ip(parts[i]) )
output[|output|] = parts[i];
}
return output;
}