specialized ZAM operators for speeding up cat() operations

This commit is contained in:
Vern Paxson 2022-09-16 09:45:05 -07:00
parent 5fe4eb27a8
commit 048e524fbf
10 changed files with 619 additions and 2 deletions

@ -1 +1 @@
Subproject commit 0ea23440be639ae0d3f4269cdb55ef8a1494ed94 Subproject commit 76eb27d2f5bea28e8e60ed0a4a29fc1ec2ec6b4e

View file

@ -404,6 +404,7 @@ set(MAIN_SRCS
script_opt/ZAM/AM-Opt.cc script_opt/ZAM/AM-Opt.cc
script_opt/ZAM/Branches.cc script_opt/ZAM/Branches.cc
script_opt/ZAM/BuiltIn.cc script_opt/ZAM/BuiltIn.cc
script_opt/ZAM/BuiltInSupport.cc
script_opt/ZAM/Driver.cc script_opt/ZAM/Driver.cc
script_opt/ZAM/Expr.cc script_opt/ZAM/Expr.cc
script_opt/ZAM/Inst-Gen.cc script_opt/ZAM/Inst-Gen.cc

View file

@ -49,6 +49,7 @@ bool ZAMCompiler::IsZAM_BuiltIn(const Expr* e)
{"Files::__enable_reassembly", &ZAMCompiler::BuiltIn_Files__enable_reassembly}, {"Files::__enable_reassembly", &ZAMCompiler::BuiltIn_Files__enable_reassembly},
{"Files::__set_reassembly_buffer", &ZAMCompiler::BuiltIn_Files__set_reassembly_buffer}, {"Files::__set_reassembly_buffer", &ZAMCompiler::BuiltIn_Files__set_reassembly_buffer},
{"Log::__write", &ZAMCompiler::BuiltIn_Log__write}, {"Log::__write", &ZAMCompiler::BuiltIn_Log__write},
{"cat", &ZAMCompiler::BuiltIn_cat},
{"current_time", &ZAMCompiler::BuiltIn_current_time}, {"current_time", &ZAMCompiler::BuiltIn_current_time},
{"get_port_transport_proto", &ZAMCompiler::BuiltIn_get_port_etc}, {"get_port_transport_proto", &ZAMCompiler::BuiltIn_get_port_etc},
{"network_time", &ZAMCompiler::BuiltIn_network_time}, {"network_time", &ZAMCompiler::BuiltIn_network_time},
@ -76,7 +77,7 @@ bool ZAMCompiler::BuiltIn_Analyzer__name(const NameExpr* n, const ExprPList& arg
if ( args[0]->Tag() == EXPR_CONST ) if ( args[0]->Tag() == EXPR_CONST )
// Doesn't seem worth developing a variant for this weird // Doesn't seem worth developing a variant for this weird
// usage cast. // usage case.
return false; return false;
int nslot = Frame1Slot(n, OP1_WRITE); int nslot = Frame1Slot(n, OP1_WRITE);
@ -199,6 +200,151 @@ bool ZAMCompiler::BuiltIn_Log__write(const NameExpr* n, const ExprPList& args)
return true; return true;
} }
bool ZAMCompiler::BuiltIn_cat(const NameExpr* n, const ExprPList& args)
{
if ( ! n )
{
reporter->Warning("return value from built-in function ignored");
return true;
}
int nslot = Frame1Slot(n, OP1_WRITE);
auto& a0 = args[0];
ZInstI z;
if ( args.empty() )
{
// Weird, but easy enough to support.
z = ZInstI(OP_CAT1_VC, nslot);
z.t = n->GetType();
z.c = ZVal(val_mgr->EmptyString());
}
else if ( args.size() > 1 )
{
switch ( args.size() )
{
case 2:
z = GenInst(OP_CAT2_V, n);
break;
case 3:
z = GenInst(OP_CAT3_V, n);
break;
case 4:
z = GenInst(OP_CAT4_V, n);
break;
case 5:
z = GenInst(OP_CAT5_V, n);
break;
case 6:
z = GenInst(OP_CAT6_V, n);
break;
case 7:
z = GenInst(OP_CAT7_V, n);
break;
case 8:
z = GenInst(OP_CAT8_V, n);
break;
default:
z = GenInst(OP_CATN_V, n);
break;
}
z.aux = BuildCatAux(args);
}
else if ( a0->GetType()->Tag() != TYPE_STRING )
{
if ( a0->Tag() == EXPR_NAME )
{
z = GenInst(OP_CAT1FULL_VV, n, a0->AsNameExpr());
z.t = a0->GetType();
}
else
{
z = ZInstI(OP_CAT1_VC, nslot);
z.t = n->GetType();
z.c = ZVal(ZAM_val_cat(a0->AsConstExpr()->ValuePtr()));
}
}
else if ( a0->Tag() == EXPR_CONST )
{
z = GenInst(OP_CAT1_VC, n, a0->AsConstExpr());
z.t = n->GetType();
}
else
z = GenInst(OP_CAT1_VV, n, a0->AsNameExpr());
AddInst(z);
return true;
}
ZInstAux* ZAMCompiler::BuildCatAux(const ExprPList& args)
{
auto n = args.size();
auto aux = new ZInstAux(n);
aux->cat_args = new std::unique_ptr<CatArg>[n];
for ( size_t i = 0; i < n; ++i )
{
auto& a_i = args[i];
auto& t = a_i->GetType();
std::unique_ptr<CatArg> ca;
if ( a_i->Tag() == EXPR_CONST )
{
auto c = a_i->AsConstExpr()->ValuePtr();
aux->Add(i, c); // it will be ignored
auto sv = ZAM_val_cat(c);
auto s = sv->AsString();
auto b = reinterpret_cast<char*>(s->Bytes());
ca = std::make_unique<CatArg>(std::string(b, s->Len()));
}
else
{
auto slot = FrameSlot(a_i->AsNameExpr());
aux->Add(i, slot, t);
switch ( t->Tag() )
{
TYPE_BOOL:
TYPE_INT:
TYPE_COUNT:
TYPE_DOUBLE:
TYPE_TIME:
TYPE_ENUM:
TYPE_PORT:
TYPE_ADDR:
TYPE_SUBNET:
ca = std::make_unique<FixedCatArg>(t);
break;
TYPE_STRING:
ca = std::make_unique<StringCatArg>();
break;
TYPE_PATTERN:
ca = std::make_unique<PatternCatArg>();
break;
default:
ca = std::make_unique<DescCatArg>(t);
break;
}
}
aux->cat_args[i] = std::move(ca);
}
return aux;
}
bool ZAMCompiler::BuiltIn_current_time(const NameExpr* n, const ExprPList& args) bool ZAMCompiler::BuiltIn_current_time(const NameExpr* n, const ExprPList& args)
{ {
if ( ! n ) if ( ! n )

View file

@ -17,6 +17,8 @@ bool BuiltIn_Broker__flush_logs(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Files__enable_reassembly(const NameExpr* n, const ExprPList& args); bool BuiltIn_Files__enable_reassembly(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, const ExprPList& args); bool BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, const ExprPList& args);
bool BuiltIn_Log__write(const NameExpr* n, const ExprPList& args); bool BuiltIn_Log__write(const NameExpr* n, const ExprPList& args);
bool BuiltIn_cat(const NameExpr* n, const ExprPList& args);
ZInstAux* BuildCatAux(const ExprPList& args);
bool BuiltIn_current_time(const NameExpr* n, const ExprPList& args); bool BuiltIn_current_time(const NameExpr* n, const ExprPList& args);
bool BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args); bool BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args);
bool BuiltIn_network_time(const NameExpr* n, const ExprPList& args); bool BuiltIn_network_time(const NameExpr* n, const ExprPList& args);

View file

@ -0,0 +1,159 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/ZAM/BuiltInSupport.h"
#include "zeek/IPAddr.h"
#include "zeek/RE.h"
namespace zeek::detail
{
FixedCatArg::FixedCatArg(const TypePtr& _t) : t(_t)
{
switch ( t->Tag() )
{
TYPE_BOOL:
max_size = 1;
break;
TYPE_INT:
max_size = 20; // sufficient for 64 bits
break;
TYPE_COUNT:
max_size = 20; // sufficient for 64 bits
break;
TYPE_DOUBLE:
TYPE_TIME:
max_size = 32; // from modp_dtoa2 documentatino
break;
TYPE_ENUM:
{
size_t n = 0;
for ( auto e : t->AsEnumType()->Names() )
n += e.first.size();
max_size = n;
break;
}
TYPE_PORT:
max_size = 5 + 1 + 7; // <number> + / + "unknown
break;
TYPE_ADDR:
max_size = 39; // for IPv6
break;
TYPE_SUBNET:
max_size = 39 + 1 + 3; // for IPv6 + / + <3-digits>
break;
default:
reporter->InternalError("bad type in FixedCatArg constructor");
}
}
void FixedCatArg::RenderInto(ZVal* zframe, int slot, char*& res)
{
auto& z = zframe[slot];
int n;
const char* text;
std::string str;
switch ( t->Tag() )
{
TYPE_BOOL:
*(res++) = z.AsInt() ? 'T' : 'F';
break;
TYPE_INT:
n = modp_litoa10(z.AsInt(), res);
res += n;
break;
TYPE_COUNT:
n = modp_ulitoa10(z.AsCount(), res);
res += n;
break;
TYPE_DOUBLE:
TYPE_TIME:
n = modp_dtoa2(z.AsDouble(), res, 6);
res += n;
break;
TYPE_PATTERN:
text = z.AsPattern()->AsPattern()->PatternText();
*(res++) = '/';
strcpy(res, text);
res += strlen(text);
*(res++) = '/';
break;
TYPE_ENUM:
text = t->AsEnumType()->Lookup(z.AsInt());
strcpy(res, text);
res += strlen(text);
break;
TYPE_PORT:
{
uint32_t full_p = static_cast<uint32_t>(z.AsCount());
zeek_uint_t p = full_p & ~PORT_SPACE_MASK;
n = modp_ulitoa10(p, res);
res += n;
if ( (full_p & TCP_PORT_MASK) == TCP_PORT_MASK )
{
strcpy(res, "/tcp");
res += 4;
}
else if ( (full_p & UDP_PORT_MASK) == UDP_PORT_MASK )
{
strcpy(res, "/udp");
res += 4;
}
else if ( (full_p & ICMP_PORT_MASK) == ICMP_PORT_MASK )
{
strcpy(res, "/icmp");
res += 5;
}
else
{
strcpy(res, "/unknown");
res += 8;
}
break;
}
TYPE_ADDR:
str = z.AsAddr()->Get().AsString();
strcpy(res, str.c_str());
res += strlen(str.c_str());
break;
TYPE_SUBNET:
str = z.AsSubNet()->Get().AsString();
strcpy(res, str.c_str());
res += strlen(str.c_str());
break;
default:
reporter->InternalError("bad type in FixedCatArg::RenderInto");
}
}
size_t PatternCatArg::ComputeMaxSize(ZVal* zframe, int slot)
{
text = zframe[slot].AsPattern()->AsPattern()->PatternText();
n = strlen(text);
return n;
}
} // zeek::detail

View file

@ -0,0 +1,119 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/Desc.h"
#include "zeek/Expr.h"
namespace zeek::detail
{
// Base class for tracking information about a single cat() argument, with
// optimizations for some common cases.
class CatArg
{
public:
CatArg(std::string _s) : s(std::move(_s)) { max_size = s->size(); }
virtual ~CatArg() { }
size_t MaxSize(ZVal* zframe, int slot)
{
return max_size ? *max_size : ComputeMaxSize(zframe, slot);
}
virtual void RenderInto(ZVal* zframe, int slot, char*& res)
{
auto n = *max_size;
memcpy(res, s->data(), n);
res += n;
}
protected:
CatArg() { }
CatArg(size_t _max_size) : max_size(_max_size) { }
virtual size_t ComputeMaxSize(ZVal* zframe, int slot) { return 0; }
// Present if max size is known a priori.
std::optional<size_t> max_size;
// Present if the argument is a constant.
std::optional<std::string> s;
};
class FixedCatArg : public CatArg
{
public:
FixedCatArg(const TypePtr& t);
void RenderInto(ZVal* zframe, int slot, char*& res) override;
protected:
const TypePtr& t;
char tmp[256];
};
class StringCatArg : public CatArg
{
public:
StringCatArg() : CatArg() { }
void RenderInto(ZVal* zframe, int slot, char*& res) override
{
auto s = zframe[slot].AsString();
auto n = s->Len();
memcpy(res, s->Bytes(), n);
res += n;
}
protected:
size_t ComputeMaxSize(ZVal* zframe, int slot) override
{
return zframe[slot].AsString()->Len();
}
};
class PatternCatArg : public CatArg
{
public:
PatternCatArg() : CatArg() { }
void RenderInto(ZVal* zframe, int slot, char*& res) override
{
*(res++) = '/';
strcpy(res, text);
res += n;
*(res++) = '/';
}
protected:
size_t ComputeMaxSize(ZVal* zframe, int slot) override;
const char* text;
size_t n = 0;
};
class DescCatArg : public CatArg
{
public:
DescCatArg(const TypePtr& _t) : CatArg(), t(_t) { d.SetStyle(RAW_STYLE); }
void RenderInto(ZVal* zframe, int slot, char*& res) override
{
auto n = d.Len();
memcpy(res, d.Bytes(), n);
res += n;
d.Clear();
}
protected:
size_t ComputeMaxSize(ZVal* zframe, int slot) override
{
zframe[slot].ToVal(t)->Describe(&d);
return d.Len();
}
ODesc d;
TypePtr t;
};
} // namespace zeek::detail

View file

@ -2173,6 +2173,174 @@ internal-op StrStr
type VVC type VVC
eval EvalStrStr(frame[z.v2], z.c) eval EvalStrStr(frame[z.v2], z.c)
macro Cat1Op(val)
auto& v1 = frame[z.v1];
ZVal::DeleteManagedType(v1);
v1 = val;
zeek::Ref(v1.string_val);
internal-op Cat1
type VC
eval Cat1Op(z.c)
internal-op Cat1
type VV
eval Cat1Op(frame[z.v2])
macro Cat1FullVal(val)
Cat1Op(ZVal(ZAM_val_cat(val.ToVal(z.t))))
internal-op Cat1Full
type VC
eval Cat1FullVal(z.c)
internal-op Cat1Full
type VV
eval Cat1FullVal(frame[z.v2])
internal-op CatN
type V
eval auto aux = z.aux;
auto slots = z.aux->slots;
auto& ca = aux->cat_args;
int n = aux->n;
size_t max_size = 0;
for ( int i = 0; i < n; ++i )
max_size += ca[i]->MaxSize(frame, slots[i]);
auto res = new char[max_size + /* slop */ n + 1];
auto res_p = res;
for ( int i = 0; i < n; ++i )
ca[i]->RenderInto(frame, slots[i], res_p);
*res_p = '\0';
auto s = new String(true, reinterpret_cast<byte_vec>(res), res_p - res);
Cat1Op(ZVal(new StringVal(s)))
macro CatNPre()
auto aux = z.aux;
auto slots = z.aux->slots;
auto& ca = aux->cat_args;
macro CatNMid()
auto res = new char[max_size + /* slop */ 10];
auto res_p = res;
macro CatNPost()
*res_p = '\0';
auto s = new String(true, reinterpret_cast<byte_vec>(res), res_p - res);
Cat1Op(ZVal(new StringVal(s)))
internal-op Cat2
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
CatNPost()
internal-op Cat3
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
CatNPost()
internal-op Cat4
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
max_size += ca[3]->MaxSize(frame, slots[3]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
ca[3]->RenderInto(frame, slots[3], res_p);
CatNPost()
internal-op Cat5
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
max_size += ca[3]->MaxSize(frame, slots[3]);
max_size += ca[4]->MaxSize(frame, slots[4]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
ca[3]->RenderInto(frame, slots[3], res_p);
ca[4]->RenderInto(frame, slots[4], res_p);
CatNPost()
internal-op Cat6
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
max_size += ca[3]->MaxSize(frame, slots[3]);
max_size += ca[4]->MaxSize(frame, slots[4]);
max_size += ca[5]->MaxSize(frame, slots[5]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
ca[3]->RenderInto(frame, slots[3], res_p);
ca[4]->RenderInto(frame, slots[4], res_p);
ca[5]->RenderInto(frame, slots[5], res_p);
CatNPost()
internal-op Cat7
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
max_size += ca[3]->MaxSize(frame, slots[3]);
max_size += ca[4]->MaxSize(frame, slots[4]);
max_size += ca[5]->MaxSize(frame, slots[5]);
max_size += ca[6]->MaxSize(frame, slots[6]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
ca[3]->RenderInto(frame, slots[3], res_p);
ca[4]->RenderInto(frame, slots[4], res_p);
ca[5]->RenderInto(frame, slots[5], res_p);
ca[6]->RenderInto(frame, slots[6], res_p);
CatNPost()
internal-op Cat8
type V
eval CatNPre()
size_t max_size = ca[0]->MaxSize(frame, slots[0]);
max_size += ca[1]->MaxSize(frame, slots[1]);
max_size += ca[2]->MaxSize(frame, slots[2]);
max_size += ca[3]->MaxSize(frame, slots[3]);
max_size += ca[4]->MaxSize(frame, slots[4]);
max_size += ca[5]->MaxSize(frame, slots[5]);
max_size += ca[6]->MaxSize(frame, slots[6]);
max_size += ca[7]->MaxSize(frame, slots[7]);
CatNMid()
ca[0]->RenderInto(frame, slots[0], res_p);
ca[1]->RenderInto(frame, slots[1], res_p);
ca[2]->RenderInto(frame, slots[2], res_p);
ca[3]->RenderInto(frame, slots[3], res_p);
ca[4]->RenderInto(frame, slots[4], res_p);
ca[5]->RenderInto(frame, slots[5], res_p);
ca[6]->RenderInto(frame, slots[6], res_p);
ca[7]->RenderInto(frame, slots[7], res_p);
CatNPost()
internal-op Analyzer--Name internal-op Analyzer--Name
type VV type VV
eval auto atype = frame[z.v2].ToVal(z.t); eval auto atype = frame[z.v2].ToVal(z.t);

View file

@ -72,6 +72,20 @@ StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_t n)
return new StringVal(ss ? ss : new String("")); return new StringVal(ss ? ss : new String(""));
} }
StringValPtr ZAM_val_cat(const ValPtr& v)
{
// Quite similar to cat(), but for only one value.
zeek::ODesc d;
d.SetStyle(RAW_STYLE);
v->Describe(&d);
String* s = new String(1, d.TakeBytes(), d.Len());
s->SetUseFreeToDelete(true);
return make_intrusive<StringVal>(s);
}
void ZAM_run_time_error(const char* msg) void ZAM_run_time_error(const char* msg)
{ {
fprintf(stderr, "%s\n", msg); fprintf(stderr, "%s\n", msg);

View file

@ -54,4 +54,6 @@ extern void ZAM_run_time_warning(const Location* loc, const char* msg);
extern StringVal* ZAM_to_lower(const StringVal* sv); extern StringVal* ZAM_to_lower(const StringVal* sv);
extern StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_t n); extern StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_t n);
extern StringValPtr ZAM_val_cat(const ValPtr& v);
} // namespace zeek::detail } // namespace zeek::detail

View file

@ -4,6 +4,8 @@
#pragma once #pragma once
#include "zeek/Desc.h"
#include "zeek/script_opt/ZAM/BuiltInSupport.h"
#include "zeek/script_opt/ZAM/Support.h" #include "zeek/script_opt/ZAM/Support.h"
#include "zeek/script_opt/ZAM/ZOp.h" #include "zeek/script_opt/ZAM/ZOp.h"
@ -319,6 +321,7 @@ public:
delete[] ints; delete[] ints;
delete[] constants; delete[] constants;
delete[] types; delete[] types;
delete[] cat_args;
} }
// Returns the i'th element of the parallel arrays as a ValPtr. // Returns the i'th element of the parallel arrays as a ValPtr.
@ -405,6 +408,9 @@ public:
ValPtr* constants = nullptr; ValPtr* constants = nullptr;
TypePtr* types = nullptr; TypePtr* types = nullptr;
// A parallel array for the cat() built-in replacement.
std::unique_ptr<CatArg>* cat_args = nullptr;
// Used for accessing function names. // Used for accessing function names.
const ID* id_val = nullptr; const ID* id_val = nullptr;