diff --git a/doc b/doc index 9b556e5e71..34e9f9add9 160000 --- a/doc +++ b/doc @@ -1 +1 @@ -Subproject commit 9b556e5e71d0d8a5c2e7a1d4be4b308d887310f1 +Subproject commit 34e9f9add97e67c9768540433cdccf221b592a4e diff --git a/scripts/base/utils/addrs.zeek b/scripts/base/utils/addrs.zeek index 9d165936ef..070b60ed04 100644 --- a/scripts/base/utils/addrs.zeek +++ b/scripts/base/utils/addrs.zeek @@ -1,31 +1,67 @@ ##! Functions for parsing and manipulating IP and MAC addresses. # Regular expressions for matching IP addresses in strings. -const ipv4_addr_regex = /[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}/; -const ipv6_8hex_regex = /([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/; -const ipv6_compressed_hex_regex = /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/; -const ipv6_hex4dec_regex = /(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; -const ipv6_compressed_hex4dec_regex = /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; -# These are commented out until patterns can be constructed this way at init time. -#const ipv6_addr_regex = ipv6_8hex_regex | -# ipv6_compressed_hex_regex | -# ipv6_hex4dec_regex | -# ipv6_compressed_hex4dec_regex; -#const ip_addr_regex = ipv4_addr_regex | ipv6_addr_regex; +const ipv4_decim = /[0-9]{1}|[0-9]{2}|0[0-9]{2}|1[0-9]{2}|2[0-4][0-9]|25[0-5]/; -const ipv6_addr_regex = - /([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/ | - /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/ | # IPv6 Compressed Hex - /(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/ | # 6Hex4Dec - /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; # CompressedHex4Dec +const ipv4_addr_regex = ipv4_decim & /\./ & ipv4_decim & /\./ & ipv4_decim & /\./ & ipv4_decim; -const ip_addr_regex = - /[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}\.[[:digit:]]{1,3}/ | - /([0-9A-Fa-f]{1,4}:){7}[0-9A-Fa-f]{1,4}/ | - /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)/ | # IPv6 Compressed Hex - /(([0-9A-Fa-f]{1,4}:){6,6})([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/ | # 6Hex4Dec - /(([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4})*)?)::(([0-9A-Fa-f]{1,4}:)*)([0-9]+)\.([0-9]+)\.([0-9]+)\.([0-9]+)/; # CompressedHex4Dec +const ipv6_hextet = /[0-9A-Fa-f]{1,4}/; + +const ipv6_8hex_regex = /([0-9A-Fa-f]{1,4}:){7}/ & ipv6_hextet; + +const ipv6_hex4dec_regex = /([0-9A-Fa-f]{1,4}:){6}/ & ipv4_addr_regex; + +const ipv6_compressed_lead_hextets0 = /::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,6})?/; + +const ipv6_compressed_lead_hextets1 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,5})?/; + +const ipv6_compressed_lead_hextets2 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){1}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,4})?/; + +const ipv6_compressed_lead_hextets3 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){2}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,3})?/; + +const ipv6_compressed_lead_hextets4 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){3}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,2})?/; + +const ipv6_compressed_lead_hextets5 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){4}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,1})?/; + +const ipv6_compressed_lead_hextets6 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){5}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,0})?/; + +const ipv6_compressed_lead_hextets7 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){6}::/; + +const ipv6_compressed_hex_regex = ipv6_compressed_lead_hextets0 | + ipv6_compressed_lead_hextets1 | + ipv6_compressed_lead_hextets2 | + ipv6_compressed_lead_hextets3 | + ipv6_compressed_lead_hextets4 | + ipv6_compressed_lead_hextets5 | + ipv6_compressed_lead_hextets6 | + ipv6_compressed_lead_hextets7; + +const ipv6_compressed_hext4dec_lead_hextets0 = /::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,4})?/ & ipv4_addr_regex; + +const ipv6_compressed_hext4dec_lead_hextets1 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,3})?/ & ipv4_addr_regex; + +const ipv6_compressed_hext4dec_lead_hextets2 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){1}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,2})?/ & ipv4_addr_regex; + +const ipv6_compressed_hext4dec_lead_hextets3 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){2}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,1})?/ & ipv4_addr_regex; + +const ipv6_compressed_hext4dec_lead_hextets4 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){3}::([0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){0,0})?/ & ipv4_addr_regex; + +const ipv6_compressed_hext4dec_lead_hextets5 = /[0-9A-Fa-f]{1,4}(:[0-9A-Fa-f]{1,4}){4}::/ & ipv4_addr_regex; + +const ipv6_compressed_hex4dec_regex = ipv6_compressed_hext4dec_lead_hextets0 | + ipv6_compressed_hext4dec_lead_hextets1 | + ipv6_compressed_hext4dec_lead_hextets2 | + ipv6_compressed_hext4dec_lead_hextets3 | + ipv6_compressed_hext4dec_lead_hextets4 | + ipv6_compressed_hext4dec_lead_hextets5; + +const ipv6_addr_regex = ipv6_8hex_regex | + ipv6_compressed_hex_regex | + ipv6_hex4dec_regex | + ipv6_compressed_hex4dec_regex; + +const ip_addr_regex = ipv4_addr_regex | ipv6_addr_regex; ## Checks if all elements of a string array are a valid octet value. ## @@ -44,49 +80,6 @@ function has_valid_octets(octets: string_vec): bool return T; } -## Checks if a string appears to be a valid IPv4 or IPv6 address. -## -## ip_str: the string to check for valid IP formatting. -## -## Returns: T if the string is a valid IPv4 or IPv6 address format. -function is_valid_ip(ip_str: string): bool - { - local octets: string_vec; - if ( ip_str == ipv4_addr_regex ) - { - octets = split_string(ip_str, /\./); - if ( |octets| != 4 ) - return F; - - return has_valid_octets(octets); - } - else if ( ip_str == ipv6_addr_regex ) - { - if ( ip_str == ipv6_hex4dec_regex || - ip_str == ipv6_compressed_hex4dec_regex ) - { - # the regexes for hybrid IPv6-IPv4 address formats don't for valid - # octets within the IPv4 part, so do that now - octets = split_string(ip_str, /\./); - if ( |octets| != 4 ) - return F; - - # get rid of remaining IPv6 stuff in first octet - local tmp = split_string(octets[0], /:/); - octets[0] = tmp[|tmp| - 1]; - - return has_valid_octets(octets); - } - else - { - # pure IPv6 address formats that only use hex digits don't need - # any additional checks -- the regexes should be complete - return T; - } - } - return F; - } - ## Extracts all IP (v4 or v6) address strings from a given string. ## ## input: a string that may contain an IP address anywhere within it. diff --git a/src/IPAddr.cc b/src/IPAddr.cc index 7917e82c29..c215b463b9 100644 --- a/src/IPAddr.cc +++ b/src/IPAddr.cc @@ -101,38 +101,44 @@ void IPAddr::ReverseMask(int top_bits_to_chop) p[i] &= mask_bits[i]; } -void IPAddr::Init(const std::string& s) +bool IPAddr::ConvertString(const char* s, in6_addr* result) { - if ( s.find(':') == std::string::npos ) // IPv4. + for ( auto p = s; *p; ++p ) + if ( *p == ':' ) + // IPv6 + return (inet_pton(AF_INET6, s, result->s6_addr) == 1); + + // IPv4 + // Parse the address directly instead of using inet_pton since + // some platforms have more sensitive implementations than others + // that can't e.g. handle leading zeroes. + int a[4]; + int n = 0; + int match_count = sscanf(s, "%d.%d.%d.%d%n", a+0, a+1, a+2, a+3, &n); + + if ( match_count != 4 ) + return false; + + if ( s[n] != '\0' ) + return false; + + for ( auto i = 0; i < 4; ++i ) + if ( a[i] < 0 || a[i] > 255 ) + return false; + + uint32_t addr = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]; + addr = htonl(addr); + memcpy(result->s6_addr, v4_mapped_prefix, sizeof(v4_mapped_prefix)); + memcpy(&result->s6_addr[12], &addr, sizeof(uint32_t)); + return true; + } + +void IPAddr::Init(const char* s) + { + if ( ! ConvertString(s, &in6) ) { - memcpy(in6.s6_addr, v4_mapped_prefix, sizeof(v4_mapped_prefix)); - - // Parse the address directly instead of using inet_pton since - // some platforms have more sensitive implementations than others - // that can't e.g. handle leading zeroes. - int a[4]; - int n = sscanf(s.c_str(), "%d.%d.%d.%d", a+0, a+1, a+2, a+3); - - if ( n != 4 || a[0] < 0 || a[1] < 0 || a[2] < 0 || a[3] < 0 || - a[0] > 255 || a[1] > 255 || a[2] > 255 || a[3] > 255 ) - { - reporter->Error("Bad IP address: %s", s.c_str()); - memset(in6.s6_addr, 0, sizeof(in6.s6_addr)); - return; - } - - uint32_t addr = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]; - addr = htonl(addr); - memcpy(&in6.s6_addr[12], &addr, sizeof(uint32_t)); - } - - else - { - if ( inet_pton(AF_INET6, s.c_str(), in6.s6_addr) <=0 ) - { - reporter->Error("Bad IP address: %s", s.c_str()); - memset(in6.s6_addr, 0, sizeof(in6.s6_addr)); - } + reporter->Error("Bad IP address: %s", s); + memset(in6.s6_addr, 0, sizeof(in6.s6_addr)); } } diff --git a/src/IPAddr.h b/src/IPAddr.h index 8ff258a860..1fdff9d979 100644 --- a/src/IPAddr.h +++ b/src/IPAddr.h @@ -68,7 +68,7 @@ public: */ IPAddr(const std::string& s) { - Init(s); + Init(s.data()); } /** @@ -366,6 +366,29 @@ public: unsigned int MemoryAllocation() const { return padded_sizeof(*this); } + /** + * Converts an IPv4 or IPv6 string into a network address structure + * (IPv6 or v4-to-v6-mapping in network bytes order). + * + * @param s the IPv4 or IPv6 string to convert (ASCII, NUL-terminated). + * + * @param result buffer that the caller supplies to store the result. + * + * @return whether the conversion was successful. + */ + static bool ConvertString(const char* s, in6_addr* result); + + /** + * @param s the IPv4 or IPv6 string to convert (ASCII, NUL-terminated). + * + * @return whether the string is a valid IP address + */ + static bool IsValid(const char* s) + { + in6_addr tmp; + return ConvertString(s, &tmp); + } + private: friend class IPPrefix; @@ -373,9 +396,9 @@ private: * Initializes an address instance from a string representation. * * @param s String containing an IP address as either a dotted IPv4 - * address or a hex IPv6 address. + * address or a hex IPv6 address (ASCII, NUL-terminated). */ - void Init(const std::string& s); + void Init(const char* s); in6_addr in6; // IPv6 or v4-to-v6-mapped address diff --git a/src/bro.bif b/src/bro.bif index 96419ab83d..7ecb688841 100644 --- a/src/bro.bif +++ b/src/bro.bif @@ -2409,6 +2409,19 @@ function to_addr%(ip: string%): addr return ret; %} +## Checks if a string is a valid IPv4 or IPv6 address. +## +## ip: the string to check for valid IP formatting. +## +## Returns: T if the string is a valid IPv4 or IPv6 address format. +function is_valid_ip%(ip: string%): bool + %{ + char* s = ip->AsString()->Render(); + auto rval = IPAddr::IsValid(s); + delete [] s; + return val_mgr->GetBool(rval); + %} + ## Converts a :bro:type:`string` to a :bro:type:`subnet`. ## ## sn: The subnet to convert. diff --git a/src/util.cc b/src/util.cc index 0367700ffb..8a8f733223 100644 --- a/src/util.cc +++ b/src/util.cc @@ -53,11 +53,13 @@ #include "iosource/Manager.h" /** - * Return IP address without enclosing brackets and any leading 0x. + * Return IP address without enclosing brackets and any leading 0x. Also + * trims leading/trailing whitespace. */ std::string extract_ip(const std::string& i) { - std::string s(skip_whitespace(i.c_str())); + std::string s(strstrip(i)); + if ( s.size() > 0 && s[0] == '[' ) s.erase(0, 1); diff --git a/testing/btest/Baseline/scripts.base.utils.addrs/output b/testing/btest/Baseline/scripts.base.utils.addrs/output index 37afcb4719..37cd37bbb2 100644 --- a/testing/btest/Baseline/scripts.base.utils.addrs/output +++ b/testing/btest/Baseline/scripts.base.utils.addrs/output @@ -1,4 +1,4 @@ -============ test ipv4 regex +============ test ipv4 regex (good strings) T T T @@ -6,9 +6,24 @@ T T T T +T +T +T +T +T +T +T +============ bad ipv4 decimals F F F +F +F +F +============ too many ipv4 decimals +F +F +============ typical looking ipv4 T T ============ test ipv6 regex @@ -30,6 +45,9 @@ T F F F +F +F ============ test extract_ip_addresses() [1.1.1.1, 2.2.2.2, 3.3.3.3] [1.1.1.1, 0:0:0:0:0:0:0:0, 3.3.3.3] +[6:1:2::3:4:5:6] diff --git a/testing/btest/scripts/base/utils/addrs.test b/testing/btest/scripts/base/utils/addrs.test index 224fd9dc62..869d27aab5 100644 --- a/testing/btest/scripts/base/utils/addrs.test +++ b/testing/btest/scripts/base/utils/addrs.test @@ -5,23 +5,54 @@ event bro_init() { + print "============ test ipv4 regex (good strings)"; local ip = "0.0.0.0"; - - print "============ test ipv4 regex"; print ip == ipv4_addr_regex; print is_valid_ip(ip); + ip = "1.1.1.1"; print ip == ipv4_addr_regex; print is_valid_ip(ip); + + ip = "9.9.9.9"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + ip = "99.99.99.99"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + ip = "09.99.99.99"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + ip = "009.99.99.99"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + ip = "255.255.255.255"; print ip == ipv4_addr_regex; print is_valid_ip(ip); + + print "============ bad ipv4 decimals"; ip = "255.255.255.256"; - print ip == ipv4_addr_regex; # the regex doesn't check for 0-255 - print is_valid_ip(ip); # but is_valid_ip() will + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + ip = "255.255.255.295"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + ip = "255.255.255.300"; + print ip == ipv4_addr_regex; + print is_valid_ip(ip); + + print "============ too many ipv4 decimals"; ip = "255.255.255.255.255"; print ip == ipv4_addr_regex; print is_valid_ip(ip); + + print "============ typical looking ipv4"; ip = "192.168.1.100"; print ip == ipv4_addr_regex; print is_valid_ip(ip); @@ -97,8 +128,16 @@ event bro_init() ip = "2001:db8:0:0:0:FFFF:192.168.0.256"; print is_valid_ip(ip); + # These have too many hextets ("::" must expand to at least one hextet) + print is_valid_ip("6:1:2::3:4:5:6:7"); + print is_valid_ip("6:1:2::3:4:5:6:7:8"); + print "============ test extract_ip_addresses()"; print extract_ip_addresses("this is 1.1.1.1 a test 2.2.2.2 string with ip addresses 3.3.3.3"); print extract_ip_addresses("this is 1.1.1.1 a test 0:0:0:0:0:0:0:0 string with ip addresses 3.3.3.3"); + # This will use the leading 6 from "IPv6" (maybe that's not intended + # by a person trying to parse such a string, but that's just what's going + # to happen; it's on them to deal). + print extract_ip_addresses("IPv6:1:2::3:4:5:6:7"); }