diff --git a/src/3rdparty b/src/3rdparty index 0ea23440be..76eb27d2f5 160000 --- a/src/3rdparty +++ b/src/3rdparty @@ -1 +1 @@ -Subproject commit 0ea23440be639ae0d3f4269cdb55ef8a1494ed94 +Subproject commit 76eb27d2f5bea28e8e60ed0a4a29fc1ec2ec6b4e diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2b17525860..b115055e08 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -404,6 +404,7 @@ set(MAIN_SRCS script_opt/ZAM/AM-Opt.cc script_opt/ZAM/Branches.cc script_opt/ZAM/BuiltIn.cc + script_opt/ZAM/BuiltInSupport.cc script_opt/ZAM/Driver.cc script_opt/ZAM/Expr.cc script_opt/ZAM/Inst-Gen.cc diff --git a/src/script_opt/ZAM/BuiltIn.cc b/src/script_opt/ZAM/BuiltIn.cc index 9f78df21d7..7cf0794d0a 100644 --- a/src/script_opt/ZAM/BuiltIn.cc +++ b/src/script_opt/ZAM/BuiltIn.cc @@ -49,6 +49,7 @@ bool ZAMCompiler::IsZAM_BuiltIn(const Expr* e) {"Files::__enable_reassembly", &ZAMCompiler::BuiltIn_Files__enable_reassembly}, {"Files::__set_reassembly_buffer", &ZAMCompiler::BuiltIn_Files__set_reassembly_buffer}, {"Log::__write", &ZAMCompiler::BuiltIn_Log__write}, + {"cat", &ZAMCompiler::BuiltIn_cat}, {"current_time", &ZAMCompiler::BuiltIn_current_time}, {"get_port_transport_proto", &ZAMCompiler::BuiltIn_get_port_etc}, {"network_time", &ZAMCompiler::BuiltIn_network_time}, @@ -76,7 +77,7 @@ bool ZAMCompiler::BuiltIn_Analyzer__name(const NameExpr* n, const ExprPList& arg if ( args[0]->Tag() == EXPR_CONST ) // Doesn't seem worth developing a variant for this weird - // usage cast. + // usage case. return false; int nslot = Frame1Slot(n, OP1_WRITE); @@ -199,6 +200,151 @@ bool ZAMCompiler::BuiltIn_Log__write(const NameExpr* n, const ExprPList& args) return true; } +bool ZAMCompiler::BuiltIn_cat(const NameExpr* n, const ExprPList& args) + { + if ( ! n ) + { + reporter->Warning("return value from built-in function ignored"); + return true; + } + + int nslot = Frame1Slot(n, OP1_WRITE); + auto& a0 = args[0]; + ZInstI z; + + if ( args.empty() ) + { + // Weird, but easy enough to support. + z = ZInstI(OP_CAT1_VC, nslot); + z.t = n->GetType(); + z.c = ZVal(val_mgr->EmptyString()); + } + + else if ( args.size() > 1 ) + { + switch ( args.size() ) + { + case 2: + z = GenInst(OP_CAT2_V, n); + break; + case 3: + z = GenInst(OP_CAT3_V, n); + break; + case 4: + z = GenInst(OP_CAT4_V, n); + break; + case 5: + z = GenInst(OP_CAT5_V, n); + break; + case 6: + z = GenInst(OP_CAT6_V, n); + break; + case 7: + z = GenInst(OP_CAT7_V, n); + break; + case 8: + z = GenInst(OP_CAT8_V, n); + break; + + default: + z = GenInst(OP_CATN_V, n); + break; + } + + z.aux = BuildCatAux(args); + } + + else if ( a0->GetType()->Tag() != TYPE_STRING ) + { + if ( a0->Tag() == EXPR_NAME ) + { + z = GenInst(OP_CAT1FULL_VV, n, a0->AsNameExpr()); + z.t = a0->GetType(); + } + else + { + z = ZInstI(OP_CAT1_VC, nslot); + z.t = n->GetType(); + z.c = ZVal(ZAM_val_cat(a0->AsConstExpr()->ValuePtr())); + } + } + + else if ( a0->Tag() == EXPR_CONST ) + { + z = GenInst(OP_CAT1_VC, n, a0->AsConstExpr()); + z.t = n->GetType(); + } + + else + z = GenInst(OP_CAT1_VV, n, a0->AsNameExpr()); + + AddInst(z); + + return true; + } + +ZInstAux* ZAMCompiler::BuildCatAux(const ExprPList& args) + { + auto n = args.size(); + auto aux = new ZInstAux(n); + aux->cat_args = new std::unique_ptr[n]; + + for ( size_t i = 0; i < n; ++i ) + { + auto& a_i = args[i]; + auto& t = a_i->GetType(); + + std::unique_ptr ca; + + if ( a_i->Tag() == EXPR_CONST ) + { + auto c = a_i->AsConstExpr()->ValuePtr(); + aux->Add(i, c); // it will be ignored + auto sv = ZAM_val_cat(c); + auto s = sv->AsString(); + auto b = reinterpret_cast(s->Bytes()); + ca = std::make_unique(std::string(b, s->Len())); + } + + else + { + auto slot = FrameSlot(a_i->AsNameExpr()); + aux->Add(i, slot, t); + + switch ( t->Tag() ) + { + TYPE_BOOL: + TYPE_INT: + TYPE_COUNT: + TYPE_DOUBLE: + TYPE_TIME: + TYPE_ENUM: + TYPE_PORT: + TYPE_ADDR: + TYPE_SUBNET: + ca = std::make_unique(t); + break; + + TYPE_STRING: + ca = std::make_unique(); + break; + + TYPE_PATTERN: + ca = std::make_unique(); + break; + + default: + ca = std::make_unique(t); + break; + } + } + + aux->cat_args[i] = std::move(ca); + } + + return aux; + } + bool ZAMCompiler::BuiltIn_current_time(const NameExpr* n, const ExprPList& args) { if ( ! n ) diff --git a/src/script_opt/ZAM/BuiltIn.h b/src/script_opt/ZAM/BuiltIn.h index 08467dcd95..3482a2cd7f 100644 --- a/src/script_opt/ZAM/BuiltIn.h +++ b/src/script_opt/ZAM/BuiltIn.h @@ -17,6 +17,8 @@ bool BuiltIn_Broker__flush_logs(const NameExpr* n, const ExprPList& args); bool BuiltIn_Files__enable_reassembly(const NameExpr* n, const ExprPList& args); bool BuiltIn_Files__set_reassembly_buffer(const NameExpr* n, const ExprPList& args); bool BuiltIn_Log__write(const NameExpr* n, const ExprPList& args); +bool BuiltIn_cat(const NameExpr* n, const ExprPList& args); +ZInstAux* BuildCatAux(const ExprPList& args); bool BuiltIn_current_time(const NameExpr* n, const ExprPList& args); bool BuiltIn_get_port_etc(const NameExpr* n, const ExprPList& args); bool BuiltIn_network_time(const NameExpr* n, const ExprPList& args); diff --git a/src/script_opt/ZAM/BuiltInSupport.cc b/src/script_opt/ZAM/BuiltInSupport.cc new file mode 100644 index 0000000000..7e26a3ff6c --- /dev/null +++ b/src/script_opt/ZAM/BuiltInSupport.cc @@ -0,0 +1,159 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/script_opt/ZAM/BuiltInSupport.h" + +#include "zeek/IPAddr.h" +#include "zeek/RE.h" + +namespace zeek::detail + { + +FixedCatArg::FixedCatArg(const TypePtr& _t) : t(_t) + { + switch ( t->Tag() ) + { + TYPE_BOOL: + max_size = 1; + break; + + TYPE_INT: + max_size = 20; // sufficient for 64 bits + break; + + TYPE_COUNT: + max_size = 20; // sufficient for 64 bits + break; + + TYPE_DOUBLE: + TYPE_TIME: + max_size = 32; // from modp_dtoa2 documentatino + break; + + TYPE_ENUM: + { + size_t n = 0; + for ( auto e : t->AsEnumType()->Names() ) + n += e.first.size(); + max_size = n; + break; + } + + TYPE_PORT: + max_size = 5 + 1 + 7; // + / + "unknown + break; + + TYPE_ADDR: + max_size = 39; // for IPv6 + break; + + TYPE_SUBNET: + max_size = 39 + 1 + 3; // for IPv6 + / + <3-digits> + break; + + default: + reporter->InternalError("bad type in FixedCatArg constructor"); + } + } + +void FixedCatArg::RenderInto(ZVal* zframe, int slot, char*& res) + { + auto& z = zframe[slot]; + int n; + const char* text; + std::string str; + + switch ( t->Tag() ) + { + TYPE_BOOL: + *(res++) = z.AsInt() ? 'T' : 'F'; + break; + + TYPE_INT: + n = modp_litoa10(z.AsInt(), res); + res += n; + break; + + TYPE_COUNT: + n = modp_ulitoa10(z.AsCount(), res); + res += n; + break; + + TYPE_DOUBLE: + TYPE_TIME: + n = modp_dtoa2(z.AsDouble(), res, 6); + res += n; + break; + + TYPE_PATTERN: + text = z.AsPattern()->AsPattern()->PatternText(); + *(res++) = '/'; + strcpy(res, text); + res += strlen(text); + *(res++) = '/'; + break; + + TYPE_ENUM: + text = t->AsEnumType()->Lookup(z.AsInt()); + strcpy(res, text); + res += strlen(text); + break; + + TYPE_PORT: + { + uint32_t full_p = static_cast(z.AsCount()); + zeek_uint_t p = full_p & ~PORT_SPACE_MASK; + n = modp_ulitoa10(p, res); + res += n; + + if ( (full_p & TCP_PORT_MASK) == TCP_PORT_MASK ) + { + strcpy(res, "/tcp"); + res += 4; + } + + else if ( (full_p & UDP_PORT_MASK) == UDP_PORT_MASK ) + { + strcpy(res, "/udp"); + res += 4; + } + + else if ( (full_p & ICMP_PORT_MASK) == ICMP_PORT_MASK ) + { + strcpy(res, "/icmp"); + res += 5; + } + + else + { + strcpy(res, "/unknown"); + res += 8; + } + + break; + } + + TYPE_ADDR: + str = z.AsAddr()->Get().AsString(); + strcpy(res, str.c_str()); + res += strlen(str.c_str()); + break; + + TYPE_SUBNET: + str = z.AsSubNet()->Get().AsString(); + strcpy(res, str.c_str()); + res += strlen(str.c_str()); + break; + + default: + reporter->InternalError("bad type in FixedCatArg::RenderInto"); + } + } + +size_t PatternCatArg::ComputeMaxSize(ZVal* zframe, int slot) + { + text = zframe[slot].AsPattern()->AsPattern()->PatternText(); + n = strlen(text); + return n; + } + + } // zeek::detail diff --git a/src/script_opt/ZAM/BuiltInSupport.h b/src/script_opt/ZAM/BuiltInSupport.h new file mode 100644 index 0000000000..a3193eaa89 --- /dev/null +++ b/src/script_opt/ZAM/BuiltInSupport.h @@ -0,0 +1,119 @@ +// See the file "COPYING" in the main distribution directory for copyright. + +#include "zeek/Desc.h" +#include "zeek/Expr.h" + +namespace zeek::detail + { + +// Base class for tracking information about a single cat() argument, with +// optimizations for some common cases. +class CatArg + { +public: + CatArg(std::string _s) : s(std::move(_s)) { max_size = s->size(); } + + virtual ~CatArg() { } + + size_t MaxSize(ZVal* zframe, int slot) + { + return max_size ? *max_size : ComputeMaxSize(zframe, slot); + } + + virtual void RenderInto(ZVal* zframe, int slot, char*& res) + { + auto n = *max_size; + memcpy(res, s->data(), n); + res += n; + } + +protected: + CatArg() { } + CatArg(size_t _max_size) : max_size(_max_size) { } + + virtual size_t ComputeMaxSize(ZVal* zframe, int slot) { return 0; } + + // Present if max size is known a priori. + std::optional max_size; + + // Present if the argument is a constant. + std::optional s; + }; + +class FixedCatArg : public CatArg + { +public: + FixedCatArg(const TypePtr& t); + + void RenderInto(ZVal* zframe, int slot, char*& res) override; + +protected: + const TypePtr& t; + char tmp[256]; + }; + +class StringCatArg : public CatArg + { +public: + StringCatArg() : CatArg() { } + + void RenderInto(ZVal* zframe, int slot, char*& res) override + { + auto s = zframe[slot].AsString(); + auto n = s->Len(); + memcpy(res, s->Bytes(), n); + res += n; + } + +protected: + size_t ComputeMaxSize(ZVal* zframe, int slot) override + { + return zframe[slot].AsString()->Len(); + } + }; + +class PatternCatArg : public CatArg + { +public: + PatternCatArg() : CatArg() { } + + void RenderInto(ZVal* zframe, int slot, char*& res) override + { + *(res++) = '/'; + strcpy(res, text); + res += n; + *(res++) = '/'; + } + +protected: + size_t ComputeMaxSize(ZVal* zframe, int slot) override; + + const char* text; + size_t n = 0; + }; + +class DescCatArg : public CatArg + { +public: + DescCatArg(const TypePtr& _t) : CatArg(), t(_t) { d.SetStyle(RAW_STYLE); } + + void RenderInto(ZVal* zframe, int slot, char*& res) override + { + auto n = d.Len(); + memcpy(res, d.Bytes(), n); + res += n; + d.Clear(); + } + +protected: + size_t ComputeMaxSize(ZVal* zframe, int slot) override + { + zframe[slot].ToVal(t)->Describe(&d); + return d.Len(); + } + + ODesc d; + TypePtr t; + }; + + } // namespace zeek::detail diff --git a/src/script_opt/ZAM/Ops.in b/src/script_opt/ZAM/Ops.in index 5fcb63adf2..f07328b524 100644 --- a/src/script_opt/ZAM/Ops.in +++ b/src/script_opt/ZAM/Ops.in @@ -2173,6 +2173,174 @@ internal-op StrStr type VVC eval EvalStrStr(frame[z.v2], z.c) +macro Cat1Op(val) + auto& v1 = frame[z.v1]; + ZVal::DeleteManagedType(v1); + v1 = val; + zeek::Ref(v1.string_val); + +internal-op Cat1 +type VC +eval Cat1Op(z.c) + +internal-op Cat1 +type VV +eval Cat1Op(frame[z.v2]) + +macro Cat1FullVal(val) + Cat1Op(ZVal(ZAM_val_cat(val.ToVal(z.t)))) + +internal-op Cat1Full +type VC +eval Cat1FullVal(z.c) + +internal-op Cat1Full +type VV +eval Cat1FullVal(frame[z.v2]) + +internal-op CatN +type V +eval auto aux = z.aux; + auto slots = z.aux->slots; + auto& ca = aux->cat_args; + int n = aux->n; + size_t max_size = 0; + for ( int i = 0; i < n; ++i ) + max_size += ca[i]->MaxSize(frame, slots[i]); + auto res = new char[max_size + /* slop */ n + 1]; + auto res_p = res; + for ( int i = 0; i < n; ++i ) + ca[i]->RenderInto(frame, slots[i], res_p); + *res_p = '\0'; + auto s = new String(true, reinterpret_cast(res), res_p - res); + Cat1Op(ZVal(new StringVal(s))) + +macro CatNPre() + auto aux = z.aux; + auto slots = z.aux->slots; + auto& ca = aux->cat_args; + +macro CatNMid() + auto res = new char[max_size + /* slop */ 10]; + auto res_p = res; + +macro CatNPost() + *res_p = '\0'; + auto s = new String(true, reinterpret_cast(res), res_p - res); + Cat1Op(ZVal(new StringVal(s))) + +internal-op Cat2 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + CatNPost() + +internal-op Cat3 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + CatNPost() + +internal-op Cat4 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + max_size += ca[3]->MaxSize(frame, slots[3]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + ca[3]->RenderInto(frame, slots[3], res_p); + CatNPost() + +internal-op Cat5 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + max_size += ca[3]->MaxSize(frame, slots[3]); + max_size += ca[4]->MaxSize(frame, slots[4]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + ca[3]->RenderInto(frame, slots[3], res_p); + ca[4]->RenderInto(frame, slots[4], res_p); + CatNPost() + +internal-op Cat6 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + max_size += ca[3]->MaxSize(frame, slots[3]); + max_size += ca[4]->MaxSize(frame, slots[4]); + max_size += ca[5]->MaxSize(frame, slots[5]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + ca[3]->RenderInto(frame, slots[3], res_p); + ca[4]->RenderInto(frame, slots[4], res_p); + ca[5]->RenderInto(frame, slots[5], res_p); + CatNPost() + +internal-op Cat7 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + max_size += ca[3]->MaxSize(frame, slots[3]); + max_size += ca[4]->MaxSize(frame, slots[4]); + max_size += ca[5]->MaxSize(frame, slots[5]); + max_size += ca[6]->MaxSize(frame, slots[6]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + ca[3]->RenderInto(frame, slots[3], res_p); + ca[4]->RenderInto(frame, slots[4], res_p); + ca[5]->RenderInto(frame, slots[5], res_p); + ca[6]->RenderInto(frame, slots[6], res_p); + CatNPost() + +internal-op Cat8 +type V +eval CatNPre() + size_t max_size = ca[0]->MaxSize(frame, slots[0]); + max_size += ca[1]->MaxSize(frame, slots[1]); + max_size += ca[2]->MaxSize(frame, slots[2]); + max_size += ca[3]->MaxSize(frame, slots[3]); + max_size += ca[4]->MaxSize(frame, slots[4]); + max_size += ca[5]->MaxSize(frame, slots[5]); + max_size += ca[6]->MaxSize(frame, slots[6]); + max_size += ca[7]->MaxSize(frame, slots[7]); + CatNMid() + ca[0]->RenderInto(frame, slots[0], res_p); + ca[1]->RenderInto(frame, slots[1], res_p); + ca[2]->RenderInto(frame, slots[2], res_p); + ca[3]->RenderInto(frame, slots[3], res_p); + ca[4]->RenderInto(frame, slots[4], res_p); + ca[5]->RenderInto(frame, slots[5], res_p); + ca[6]->RenderInto(frame, slots[6], res_p); + ca[7]->RenderInto(frame, slots[7], res_p); + CatNPost() + internal-op Analyzer--Name type VV eval auto atype = frame[z.v2].ToVal(z.t); diff --git a/src/script_opt/ZAM/Support.cc b/src/script_opt/ZAM/Support.cc index 81aeaad340..a9c25b2b7d 100644 --- a/src/script_opt/ZAM/Support.cc +++ b/src/script_opt/ZAM/Support.cc @@ -72,6 +72,20 @@ StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_t n) return new StringVal(ss ? ss : new String("")); } +StringValPtr ZAM_val_cat(const ValPtr& v) + { + // Quite similar to cat(), but for only one value. + zeek::ODesc d; + d.SetStyle(RAW_STYLE); + + v->Describe(&d); + + String* s = new String(1, d.TakeBytes(), d.Len()); + s->SetUseFreeToDelete(true); + + return make_intrusive(s); + } + void ZAM_run_time_error(const char* msg) { fprintf(stderr, "%s\n", msg); diff --git a/src/script_opt/ZAM/Support.h b/src/script_opt/ZAM/Support.h index 1660ae6277..84c3114e74 100644 --- a/src/script_opt/ZAM/Support.h +++ b/src/script_opt/ZAM/Support.h @@ -54,4 +54,6 @@ extern void ZAM_run_time_warning(const Location* loc, const char* msg); extern StringVal* ZAM_to_lower(const StringVal* sv); extern StringVal* ZAM_sub_bytes(const StringVal* s, zeek_uint_t start, zeek_int_t n); +extern StringValPtr ZAM_val_cat(const ValPtr& v); + } // namespace zeek::detail diff --git a/src/script_opt/ZAM/ZInst.h b/src/script_opt/ZAM/ZInst.h index 9526511424..a27ba054c2 100644 --- a/src/script_opt/ZAM/ZInst.h +++ b/src/script_opt/ZAM/ZInst.h @@ -4,6 +4,8 @@ #pragma once +#include "zeek/Desc.h" +#include "zeek/script_opt/ZAM/BuiltInSupport.h" #include "zeek/script_opt/ZAM/Support.h" #include "zeek/script_opt/ZAM/ZOp.h" @@ -319,6 +321,7 @@ public: delete[] ints; delete[] constants; delete[] types; + delete[] cat_args; } // Returns the i'th element of the parallel arrays as a ValPtr. @@ -405,6 +408,9 @@ public: ValPtr* constants = nullptr; TypePtr* types = nullptr; + // A parallel array for the cat() built-in replacement. + std::unique_ptr* cat_args = nullptr; + // Used for accessing function names. const ID* id_val = nullptr;