Conn/net_utils/fmt_mac: Avoid snprintf(), memcpy() and allocation

The fmt_mac() function returning a std::string means the resulting mac
is copied at least once upon returning. Then, the Assign() in GetVal()
taking a std::string internally allocates a new zeek::String which
hits a malloc (no short-string optimization for zeek::String) and then
also copies the content from the std::string into the malloced memory.

Save a few cycles by directly using the allocated memory with the
String instance. This change improves runtime for a SYN-only pcap
with just base/protocols/conn loaded by some 1-2%.
This commit is contained in:
Arne Welzel 2024-02-08 11:32:06 +01:00
parent c41977057a
commit 29f5b507b6
3 changed files with 87 additions and 16 deletions

View file

@ -196,16 +196,20 @@ const RecordValPtr& Connection::GetVal() {
const int l2_len = sizeof(orig_l2_addr); const int l2_len = sizeof(orig_l2_addr);
char null[l2_len]{}; char null[l2_len]{};
if ( memcmp(&orig_l2_addr, &null, l2_len) != 0 ) if ( memcmp(&orig_l2_addr, &null, l2_len) != 0 ) {
orig_endp->Assign(5, fmt_mac(orig_l2_addr, l2_len)); auto [mac_bytes, mac_len] = fmt_mac_bytes(orig_l2_addr, l2_len);
orig_endp->Assign(5, new String(true, mac_bytes.release(), mac_len));
}
auto resp_endp = make_intrusive<RecordVal>(id::endpoint); auto resp_endp = make_intrusive<RecordVal>(id::endpoint);
resp_endp->Assign(0, 0); resp_endp->Assign(0, 0);
resp_endp->Assign(1, 0); resp_endp->Assign(1, 0);
resp_endp->Assign(4, resp_flow_label); resp_endp->Assign(4, resp_flow_label);
if ( memcmp(&resp_l2_addr, &null, l2_len) != 0 ) if ( memcmp(&resp_l2_addr, &null, l2_len) != 0 ) {
resp_endp->Assign(5, fmt_mac(resp_l2_addr, l2_len)); auto [mac_bytes, mac_len] = fmt_mac_bytes(resp_l2_addr, l2_len);
resp_endp->Assign(5, new String(true, mac_bytes.release(), mac_len));
}
conn_val->Assign(0, std::move(id_val)); conn_val->Assign(0, std::move(id_val));
conn_val->Assign(1, std::move(orig_endp)); conn_val->Assign(1, std::move(orig_endp));

View file

@ -8,7 +8,9 @@
#include <netinet/in.h> #include <netinet/in.h>
#include <sys/socket.h> #include <sys/socket.h>
#include <sys/types.h> #include <sys/types.h>
#include <memory>
#include "zeek/3rdparty/doctest.h"
#include "zeek/IP.h" #include "zeek/IP.h"
#include "zeek/IPAddr.h" #include "zeek/IPAddr.h"
#include "zeek/Reporter.h" #include "zeek/Reporter.h"
@ -157,21 +159,69 @@ const char* fmt_conn_id(const uint32_t* src_addr, uint32_t src_port, const uint3
return fmt_conn_id(src, src_port, dst, dst_port); return fmt_conn_id(src, src_port, dst, dst_port);
} }
std::string fmt_mac(const unsigned char* m, int len) { TEST_CASE("fmt_mac") {
static char buf[25]; auto my_fmt_mac = [](const char* m, int len) {
// allow working with literal strings
return fmt_mac(reinterpret_cast<const unsigned char*>(m), len);
};
CHECK(my_fmt_mac("", 0) == "");
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06", 4) == "");
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06", 6) == "01:02:03:04:05:06");
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06\x00\x00", 8) == "01:02:03:04:05:06");
CHECK(my_fmt_mac("\x01\x02\x03\x04\x05\x06\x07\x08", 8) == "01:02:03:04:05:06:07:08");
CHECK(my_fmt_mac("\x08\x07\x06\x05\x04\x03\x02\x01", 8) == "08:07:06:05:04:03:02:01");
}
std::string fmt_mac(const unsigned char* m, int len) {
auto [mac_bytes, slen] = fmt_mac_bytes(m, len);
return reinterpret_cast<char*>(mac_bytes.get());
}
TEST_CASE("fmt_mac_bytes") {
auto my_fmt_mac_bytes = [](const char* m, int len) {
// allow working with literal strings
return fmt_mac_bytes(reinterpret_cast<const unsigned char*>(m), len);
};
auto [buf1, len1] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06", 4);
CHECK(len1 == 0);
CHECK(memcmp(buf1.get(), "", 1) == 0); // still null terminated
auto [buf2, len2] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06", 6);
CHECK(len2 == 2 * 6 + 5);
CHECK(memcmp(buf2.get(), "01:02:03:04:05:06", len2 + 1) == 0);
auto [buf3, len3] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06\x00\x00", 8);
CHECK(len3 == 2 * 6 + 5);
CHECK(memcmp(buf3.get(), "01:02:03:04:05:06", len3 + 1) == 0);
// Check for no memory overreads
auto [buf4, len4] = my_fmt_mac_bytes("\x01\x02\x03\x04\x05\x06\x07\x08\xff\xff", 42);
CHECK(len4 == 2 * 8 + 7);
CHECK(memcmp(buf4.get(), "01:02:03:04:05:06:07:08", len4 + 1) == 0);
}
std::pair<std::unique_ptr<uint8_t[]>, int> fmt_mac_bytes(const unsigned char* m, int len) {
if ( len < 8 && len != 6 ) { if ( len < 8 && len != 6 ) {
*buf = '\0'; auto buf = std::make_unique<uint8_t[]>(1);
return buf; buf[0] = '\0';
return {std::move(buf), 0};
} }
if ( (len == 6) || (m[6] == 0 && m[7] == 0) ) // EUI-48 int elen = (len == 6 || (m[6] == 0 && m[7] == 0)) ? 6 : 8;
snprintf(buf, sizeof(buf), "%02x:%02x:%02x:%02x:%02x:%02x", m[0], m[1], m[2], m[3], m[4], m[5]); auto slen = 2 * elen + (elen - 1);
else auto buf = std::make_unique<uint8_t[]>(slen + 1);
snprintf(buf, sizeof(buf), "%02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x", m[0], m[1], m[2], m[3], m[4], m[5], m[6], auto* bufp = reinterpret_cast<char*>(buf.get());
m[7]);
return buf; for ( int i = 0; i < elen; i++ ) {
zeek::util::bytetohex(m[i], &bufp[i * 2 + i]);
if ( i < elen - 1 )
bufp[i * 2 + i + 2] = ':';
}
bufp[slen] = '\0';
return {std::move(buf), slen};
} }
uint32_t extract_uint32(const u_char* data) { uint32_t extract_uint32(const u_char* data) {

View file

@ -207,12 +207,29 @@ extern const char* fmt_conn_id(const uint32_t* src_addr, uint32_t src_port, cons
* an empty string. * an empty string.
* *
* @param m EUI-48 or EUI-64 MAC address to format, as a char array * @param m EUI-48 or EUI-64 MAC address to format, as a char array
* @param len Number of bytes valid starting at *n*. This must be at * @param len Number of bytes valid starting at *m*. This must be at
* least 8 for a valid address. * least 6 for a valid address.
* @return A string of the formatted MAC. Passes ownership to caller. * @return A string of the formatted MAC. Passes ownership to caller.
*/ */
extern std::string fmt_mac(const unsigned char* m, int len); extern std::string fmt_mac(const unsigned char* m, int len);
/**
* Given a MAC address, formats it in hex as 00:de:ad:be:ef.
* Supports both EUI-48 and EUI-64. If it's neither, returns
* an empty buffer.
*
* This method returns a unique pointer to a buffer that is
* null terminated. It can, for example, be directly passed
* to a zeek::String instance.
*
* @param m EUI-48 or EUI-64 MAC address to format, as a char array
* @param len Number of bytes valid starting at *m*. This must be at
* least 6 for a valid address.
* @return A pair consisting of a uint8_t buffer and its string length.
* The buffer is null terminated and its actual length is + 1.
*/
extern std::pair<std::unique_ptr<uint8_t[]>, int> fmt_mac_bytes(const unsigned char* m, int len);
// Read 4 bytes from data and return in network order. // Read 4 bytes from data and return in network order.
extern uint32_t extract_uint32(const u_char* data); extern uint32_t extract_uint32(const u_char* data);