reintroduction of "-O add-C++" option

This commit is contained in:
Vern Paxson 2021-12-10 10:19:23 -08:00
parent 7a6a81c200
commit 8c59626eb9
12 changed files with 74 additions and 488 deletions

View file

@ -390,7 +390,6 @@ set(MAIN_SRCS
script_opt/CPP/Exprs.cc script_opt/CPP/Exprs.cc
script_opt/CPP/Func.cc script_opt/CPP/Func.cc
script_opt/CPP/GenFunc.cc script_opt/CPP/GenFunc.cc
script_opt/CPP/HashMgr.cc
script_opt/CPP/Inits.cc script_opt/CPP/Inits.cc
script_opt/CPP/InitsInfo.cc script_opt/CPP/InitsInfo.cc
script_opt/CPP/RuntimeInits.cc script_opt/CPP/RuntimeInits.cc

View file

@ -198,6 +198,7 @@ static void print_analysis_help()
fprintf(stderr, " xform transform scripts to \"reduced\" form\n"); fprintf(stderr, " xform transform scripts to \"reduced\" form\n");
fprintf(stderr, "\n--optimize options when generating C++:\n"); fprintf(stderr, "\n--optimize options when generating C++:\n");
fprintf(stderr, " add-C++ add C++ script bodies to existing generated code\n");
fprintf(stderr, " gen-C++ generate C++ script bodies\n"); fprintf(stderr, " gen-C++ generate C++ script bodies\n");
fprintf(stderr, " gen-standalone-C++ generate \"standalone\" C++ script bodies\n"); fprintf(stderr, " gen-standalone-C++ generate \"standalone\" C++ script bodies\n");
fprintf(stderr, " help print this list\n"); fprintf(stderr, " help print this list\n");
@ -230,6 +231,8 @@ static void set_analysis_option(const char* opt, Options& opts)
a_o.activate = a_o.dump_xform = true; a_o.activate = a_o.dump_xform = true;
else if ( util::streq(opt, "dump-ZAM") ) else if ( util::streq(opt, "dump-ZAM") )
a_o.activate = a_o.dump_ZAM = true; a_o.activate = a_o.dump_ZAM = true;
else if ( util::streq(opt, "add-C++") )
a_o.add_CPP = true;
else if ( util::streq(opt, "gen-C++") ) else if ( util::streq(opt, "gen-C++") )
a_o.gen_CPP = true; a_o.gen_CPP = true;
else if ( util::streq(opt, "gen-standalone-C++") ) else if ( util::streq(opt, "gen-standalone-C++") )

View file

@ -4,7 +4,6 @@
#include "zeek/Desc.h" #include "zeek/Desc.h"
#include "zeek/script_opt/CPP/Func.h" #include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h" #include "zeek/script_opt/CPP/InitsInfo.h"
#include "zeek/script_opt/CPP/Tracker.h" #include "zeek/script_opt/CPP/Tracker.h"
#include "zeek/script_opt/CPP/Util.h" #include "zeek/script_opt/CPP/Util.h"
@ -15,10 +14,9 @@
// functionality into a number of groups (see below), these interact with // functionality into a number of groups (see below), these interact with
// one another, and in particular with various member variables, enough // one another, and in particular with various member variables, enough
// so that it's not clear there's benefit to further splitting the // so that it's not clear there's benefit to further splitting the
// functionality into multiple classes. (Some splitting has already been // functionality into multiple classes. (Some splitting has already been done
// done for more self-contained functionality, resulting in the CPPTracker // for more self-contained functionality, resulting in the CPPTracker class
// and CPPHashManager classes, and initialization information in // and initialization information in InitsInfo.{h,cc} and RuntimeInits.{h,cc}.)
// InitsInfo.{h,cc} and RuntimeInits.{h,cc}.)
// //
// Most aspects of translating to C++ have a straightforward nature. // Most aspects of translating to C++ have a straightforward nature.
// We can turn many Zeek script statements directly into the C++ that's // We can turn many Zeek script statements directly into the C++ that's
@ -129,8 +127,7 @@ class CPPCompile
{ {
public: public:
CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name, CPPCompile(std::vector<FuncInfo>& _funcs, ProfileFuncs& pfs, const std::string& gen_name,
const std::string& addl_name, CPPHashManager& _hm, bool _standalone, bool add, bool _standalone, bool report_uncompilable);
bool report_uncompilable);
~CPPCompile(); ~CPPCompile();
// Constructing a CPPCompile object does all of the compilation. // Constructing a CPPCompile object does all of the compilation.
@ -317,10 +314,6 @@ private:
// The global profile of all of the functions. // The global profile of all of the functions.
ProfileFuncs& pfs; ProfileFuncs& pfs;
// Hash-indexed information about previously compiled code (and used
// to update it from this compilation run).
CPPHashManager& hm;
// Script functions that we are able to compile. We compute // Script functions that we are able to compile. We compute
// these ahead of time so that when compiling script function A // these ahead of time so that when compiling script function A
// which makes a call to script function B, we know whether // which makes a call to script function B, we know whether
@ -369,11 +362,6 @@ private:
// See Vars.cc for definitions. // See Vars.cc for definitions.
// //
// Returns true if the current compilation context has collisions
// with previously generated code (globals with conflicting types
// or initialization values, or types with differing elements).
bool CheckForCollisions();
// Generate declarations associated with the given global, and, if // Generate declarations associated with the given global, and, if
// it's used as a variable (not just as a function being called), // it's used as a variable (not just as a function being called),
// track it as such. // track it as such.
@ -385,10 +373,8 @@ private:
// Register the given global name. "suffix" distinguishs particular // Register the given global name. "suffix" distinguishs particular
// types of globals, such as the names of bifs, global (non-function) // types of globals, such as the names of bifs, global (non-function)
// variables, or compiled Zeek functions. If "track" is true then // variables, or compiled Zeek functions.
// if we're compiling incrementally, and this is a new global not bool AddGlobal(const std::string& g, const char* suffix);
// previously compiled, then we track its hash for future compilations.
bool AddGlobal(const std::string& g, const char* suffix, bool track);
// Tracks that the body we're currently compiling refers to the // Tracks that the body we're currently compiling refers to the
// given event. // given event.
@ -936,7 +922,7 @@ private:
const char* IntrusiveVal(const TypePtr& t); const char* IntrusiveVal(const TypePtr& t);
// Maps types to indices in the global "types__CPP" array. // Maps types to indices in the global "types__CPP" array.
CPPTracker<Type> types = {"types", true, &compiled_items}; CPPTracker<Type> types = {"types", true};
// Used to prevent analysis of mutually-referring types from // Used to prevent analysis of mutually-referring types from
// leading to infinite recursion. Maps types to their global // leading to infinite recursion. Maps types to their global
@ -966,7 +952,7 @@ private:
static const char* AttrName(AttrTag t); static const char* AttrName(AttrTag t);
// Similar for attributes, so we can reconstruct record types. // Similar for attributes, so we can reconstruct record types.
CPPTracker<Attributes> attributes = {"attrs", false, &compiled_items}; CPPTracker<Attributes> attributes = {"attrs", false};
// Maps Attributes and Attr's to their global initialization // Maps Attributes and Attr's to their global initialization
// information. // information.
@ -1036,7 +1022,7 @@ private:
// Expressions for which we need to generate initialization-time // Expressions for which we need to generate initialization-time
// code. Currently, these are only expressions appearing in // code. Currently, these are only expressions appearing in
// attributes. // attributes.
CPPTracker<Expr> init_exprs = {"gen_init_expr", false, &compiled_items}; CPPTracker<Expr> init_exprs = {"gen_init_expr", false};
// //
// End of methods related to run-time initialization. // End of methods related to run-time initialization.
@ -1127,9 +1113,6 @@ private:
// File to which we're generating code. // File to which we're generating code.
FILE* write_file; FILE* write_file;
// Name of file holding potential "additional" code.
std::string addl_name;
// Indentation level. // Indentation level.
int block_level = 0; int block_level = 0;

View file

@ -12,13 +12,11 @@ namespace zeek::detail
using namespace std; using namespace std;
CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name, CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const string& gen_name,
const string& _addl_name, CPPHashManager& _hm, bool _standalone, bool add, bool _standalone, bool report_uncompilable)
bool report_uncompilable) : funcs(_funcs), pfs(_pfs), standalone(_standalone)
: funcs(_funcs), pfs(_pfs), hm(_hm), standalone(_standalone)
{ {
addl_name = _addl_name;
auto target_name = gen_name.c_str(); auto target_name = gen_name.c_str();
auto mode = "w"; auto mode = add ? "a" : "w";
write_file = fopen(target_name, mode); write_file = fopen(target_name, mode);
if ( ! write_file ) if ( ! write_file )
@ -26,19 +24,33 @@ CPPCompile::CPPCompile(vector<FuncInfo>& _funcs, ProfileFuncs& _pfs, const strin
reporter->Error("can't open C++ target file %s", target_name); reporter->Error("can't open C++ target file %s", target_name);
exit(1); exit(1);
} }
else
if ( add )
{ {
// Create an empty "additional" file. // We need a unique number to associate with the name
auto addl_f = fopen(addl_name.c_str(), "w"); // space for the code we're adding. A convenient way to
if ( ! addl_f ) // generate this safely is to use the present size of the
// file we're appending to. That guarantees that every
// incremental compilation will wind up with a different
// number.
struct stat st;
if ( fstat(fileno(write_file), &st) != 0 )
{ {
reporter->Error("can't open C++ additional file %s", addl_name.c_str()); char buf[256];
util::zeek_strerror_r(errno, buf, sizeof(buf));
reporter->Error("fstat failed on %s: %s", target_name, buf);
exit(1); exit(1);
} }
fclose(addl_f); // We use a value of "0" to mean "we're not appending,
// we're generating from scratch", so make sure we're
// distinct from that.
addl_tag = st.st_size + 1;
} }
else
addl_tag = 0;
Compile(report_uncompilable); Compile(report_uncompilable);
} }
@ -84,15 +96,6 @@ void CPPCompile::Compile(bool report_uncompilable)
reason); reason);
not_fully_compilable.insert(func.Func()->Name()); not_fully_compilable.insert(func.Func()->Name());
} }
auto h = func.Profile()->HashVal();
if ( hm.HasHash(h) )
{
// Track the previously compiled instance
// of this function.
auto n = func.Func()->Name();
hashed_funcs[n] = hm.FuncBodyName(h);
}
} }
// Track all of the types we'll be using. // Track all of the types we'll be using.
@ -111,7 +114,7 @@ void CPPCompile::Compile(bool report_uncompilable)
CreateGlobal(g); CreateGlobal(g);
for ( const auto& e : pfs.Events() ) for ( const auto& e : pfs.Events() )
if ( AddGlobal(e, "gl", false) ) if ( AddGlobal(e, "gl") )
Emit("EventHandlerPtr %s_ev;", globals[string(e)]); Emit("EventHandlerPtr %s_ev;", globals[string(e)]);
for ( const auto& t : pfs.RepTypes() ) for ( const auto& t : pfs.RepTypes() )
@ -177,9 +180,9 @@ void CPPCompile::GenProlog()
if ( addl_tag == 0 ) if ( addl_tag == 0 )
{ {
Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n"); Emit("#include \"zeek/script_opt/CPP/Runtime.h\"\n");
Emit("namespace zeek::detail { //\n");
} }
Emit("namespace zeek::detail { //\n");
Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir); Emit("namespace CPP_%s { // %s\n", Fmt(addl_tag), working_dir);
// The following might-or-might-not wind up being populated/used. // The following might-or-might-not wind up being populated/used.
@ -273,10 +276,7 @@ void CPPCompile::RegisterCompiledBody(const string& f)
// Hash in the location associated with this compilation // Hash in the location associated with this compilation
// pass, to get a final hash that avoids conflicts with // pass, to get a final hash that avoids conflicts with
// identical-but-in-a-different-context function bodies // identical-but-in-a-different-context function bodies
// when compiling potentially conflicting additional code // when compiling potentially conflicting additional code.
// (which we want to support to enable quicker test suite
// runs by enabling multiple tests to be compiled into the
// same binary).
h = merge_p_hashes(h, p_hash(cf_locs[f])); h = merge_p_hashes(h, p_hash(cf_locs[f]));
auto fi = func_index.find(f); auto fi = func_index.find(f);
@ -418,11 +418,6 @@ void CPPCompile::GenEpilog()
GenInitHook(); GenInitHook();
Emit("} // %s\n\n", scope_prefix(addl_tag)); Emit("} // %s\n\n", scope_prefix(addl_tag));
if ( addl_tag > 0 )
return;
Emit("#include \"" + addl_name + "\"\n");
Emit("} // zeek::detail"); Emit("} // zeek::detail");
} }
@ -439,10 +434,6 @@ bool CPPCompile::IsCompilable(const FuncInfo& func, const char** reason)
if ( func.ShouldSkip() ) if ( func.ShouldSkip() )
return false; return false;
if ( hm.HasHash(func.Profile()->HashVal()) )
// We've already compiled it.
return false;
return true; return true;
} }

View file

@ -1,148 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
#include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/Func.h"
#include "zeek/script_opt/CPP/Util.h"
namespace zeek::detail
{
using namespace std;
VarMapper compiled_items;
CPPHashManager::CPPHashManager(const char* hash_name_base)
{
hash_name = string(hash_name_base) + ".dat";
hf_w = fopen(hash_name.c_str(), "w");
if ( ! hf_w )
{
reporter->Error("can't open auxiliary C++ hash file %s for writing", hash_name.c_str());
exit(1);
}
}
CPPHashManager::~CPPHashManager()
{
fclose(hf_w);
if ( hf_r )
{
unlock_file(hash_name, hf_r);
fclose(hf_r);
}
}
void CPPHashManager::LoadHashes(FILE* f)
{
string key;
// The hash file format is inefficient but simple to scan.
// It doesn't appear to pose a bottleneck, so until it does
// it makes sense for maintainability to keep it dead simple.
while ( GetLine(f, key) )
{
string line;
RequireLine(f, line);
p_hash_type hash;
if ( key == "func" )
{
auto func = line;
RequireLine(f, line);
if ( sscanf(line.c_str(), "%llu", &hash) != 1 || hash == 0 )
BadLine(line);
previously_compiled[hash] = func;
}
else if ( key == "global" )
{
auto gl = line;
RequireLine(f, line);
p_hash_type gl_t_h, gl_v_h;
if ( sscanf(line.c_str(), "%llu %llu", &gl_t_h, &gl_v_h) != 2 )
BadLine(line);
gl_type_hashes[gl] = gl_t_h;
gl_val_hashes[gl] = gl_v_h;
// Eat the location info. It's there just for
// maintainers to be able to track down peculiarities
// in the hash file.
(void)RequireLine(f, line);
}
else if ( key == "global-var" )
{
auto gl = line;
RequireLine(f, line);
int scope;
if ( sscanf(line.c_str(), "%d", &scope) != 1 )
BadLine(line);
gv_scopes[gl] = scope;
}
else if ( key == "hash" )
{
int index;
int scope;
if ( sscanf(line.c_str(), "%llu %d %d", &hash, &index, &scope) != 3 || hash == 0 )
BadLine(line);
compiled_items[hash] = CompiledItemPair{index, scope};
}
else if ( key == "record" )
record_type_globals.insert(line);
else if ( key == "enum" )
enum_type_globals.insert(line);
else
BadLine(line);
}
}
void CPPHashManager::RequireLine(FILE* f, string& line)
{
if ( ! GetLine(f, line) )
{
reporter->Error("missing final %s hash file entry", hash_name.c_str());
exit(1);
}
}
bool CPPHashManager::GetLine(FILE* f, string& line)
{
char buf[8192];
if ( ! fgets(buf, sizeof buf, f) )
return false;
int n = strlen(buf);
if ( n > 0 && buf[n - 1] == '\n' )
buf[n - 1] = '\0';
line = buf;
return true;
}
void CPPHashManager::BadLine(string& line)
{
reporter->Error("bad %s hash file entry: %s", hash_name.c_str(), line.c_str());
exit(1);
}
} // zeek::detail

View file

@ -1,117 +0,0 @@
// See the file "COPYING" in the main distribution directory for copyright.
// C++ compiler support class for managing information about compiled
// objects across compilations. The objects are identified via hashes,
// hence the term "hash manager". Objects can exist in different scopes.
// The information mapping hashes to objects and scopes is tracked
// across multiple compilations using intermediary file(s).
#pragma once
#include <stdio.h>
#include "zeek/script_opt/ProfileFunc.h"
namespace zeek::detail
{
class CPPHashManager
{
public:
// Create a hash manager that uses the given name for
// referring to hash file(s). It's a "base" rather than
// a full name in case the manager winds up managing multiple
// distinct files (not currently the case).
//
// If "append" is true then new hashes will be added to the
// end of the file (and the hash file will be locked, to prevent
// overlapping updates from concurrent compilation/appends).
// Otherwise, the file will be generated afresh.
CPPHashManager(const char* hash_name_base);
~CPPHashManager();
// True if the given hash has already been generated.
bool HasHash(p_hash_type h) const { return previously_compiled.count(h) > 0; }
// The internal (C++) name of a previously compiled function,
// as identified by its hash.
const std::string& FuncBodyName(p_hash_type h) { return previously_compiled[h]; }
// Whether the given global has already been generated;
// and, if so, the hashes of its type and initialization
// value (used for consistency checking). Here the name
// is that used at the script level.
bool HasGlobal(const std::string& gl) const { return gl_type_hashes.count(gl) > 0; }
p_hash_type GlobalTypeHash(const std::string& gl) { return gl_type_hashes[gl]; }
p_hash_type GlobalValHash(const std::string& gl) { return gl_val_hashes[gl]; }
// Whether the given C++ global already exists, and, if so,
// in what scope.
bool HasGlobalVar(const std::string& gv) const { return gv_scopes.count(gv) > 0; }
int GlobalVarScope(const std::string& gv) { return gv_scopes[gv]; }
// True if the given global corresponds to a record type
// or an enum type. Used to suppress complaints about
// definitional inconsistencies for extensible types.
bool HasRecordTypeGlobal(const std::string& rt) const
{
return record_type_globals.count(rt) > 0;
}
bool HasEnumTypeGlobal(const std::string& et) const { return enum_type_globals.count(et) > 0; }
// Access to the file we're writing hashes to, so that the
// compiler can add new entries to it.
FILE* HashFile() const { return hf_w; }
protected:
// Parses an existing file with hash information.
void LoadHashes(FILE* f);
// Helper routines to load lines from hash file.
// The first complains if the line isn't present;
// the second merely indicates whether it was.
void RequireLine(FILE* f, std::string& line);
bool GetLine(FILE* f, std::string& line);
// Generates an error message for a ill-formatted hash file line.
void BadLine(std::string& line);
// Tracks previously compiled bodies based on hashes, mapping them
// to fully qualified (in terms of scoping) C++ names.
std::unordered_map<p_hash_type, std::string> previously_compiled;
// Tracks globals that are record or enum types, indexed using
// script-level names.
std::unordered_set<std::string> record_type_globals;
std::unordered_set<std::string> enum_type_globals;
// Tracks globals seen in previously compiled bodies, mapping
// script-level names to hashes of their types and their values.
std::unordered_map<std::string, p_hash_type> gl_type_hashes;
std::unordered_map<std::string, p_hash_type> gl_val_hashes;
// Information about globals in terms of their internal variable
// names, rather than their script-level names.
std::unordered_map<std::string, int> gv_scopes;
// Base for file names.
std::string hash_name;
// Handles for reading from and writing to the hash file.
// We lock on the first
FILE* hf_r = nullptr;
FILE* hf_w = nullptr;
};
// Maps hashes to indices into C++ globals (like "types_N__CPP"), and
// namespace scopes.
struct CompiledItemPair
{
int index;
int scope;
};
using VarMapper = std::unordered_map<p_hash_type, CompiledItemPair>;
extern VarMapper compiled_items;
} // zeek::detail

View file

@ -73,9 +73,7 @@ The following workflow assumes you are in the `build/` subdirectory:
1. `./src/zeek -O gen-C++ target.zeek` 1. `./src/zeek -O gen-C++ target.zeek`
The generated code is written to The generated code is written to
`CPP-gen.cc`. The compiler will also produce `CPP-gen.cc`.
a file `CPP-hashes.dat`, for use by an advanced feature, and an
empty `CPP-gen-addl.h` file (same).
2. `ninja` or `make` to recompile Zeek 2. `ninja` or `make` to recompile Zeek
3. `./src/zeek -O use-C++ target.zeek` 3. `./src/zeek -O use-C++ target.zeek`
Executes with each function/hook/event Executes with each function/hook/event
@ -110,6 +108,10 @@ On the other hand, it's possible (not yet established) that code created
using `gen-C++` can be made to compile significantly faster than using `gen-C++` can be made to compile significantly faster than
standalone code. standalone code.
Another option, `-O add-C++`, instead _appends_ the generated code to existing C++ in `CPP-gen.cc`.
You can use this option repeatedly for different scripts and then
compile the collection _en masse_.
There are additional workflows relating to running the test suite, which There are additional workflows relating to running the test suite, which
we document only briefly here as they're likely going to change or go away we document only briefly here as they're likely going to change or go away
, as it's not clear they're actually needed. , as it's not clear they're actually needed.

View file

@ -21,19 +21,8 @@ template <class T> void CPPTracker<T>::AddKey(IntrusivePtr<T> key, p_hash_type h
if ( map2.count(h) == 0 ) if ( map2.count(h) == 0 )
{ {
int index; auto index = keys.size();
if ( mapper && mapper->count(h) > 0 ) keys.push_back(key);
{
const auto& pair = (*mapper)[h];
index = pair.index;
scope2[h] = Fmt(pair.scope);
inherited.insert(h);
}
else
{
index = num_non_inherited++;
keys.push_back(key);
}
map2[h] = index; map2[h] = index;
reps[h] = key.get(); reps[h] = key.get();
@ -57,11 +46,6 @@ template <class T> string CPPTracker<T>::KeyName(const T* key)
return gi->second->Name(); return gi->second->Name();
auto index = map2[hash]; auto index = map2[hash];
string scope;
if ( IsInherited(hash) )
scope = scope_prefix(scope2[hash]);
string ind = Fmt(index); string ind = Fmt(index);
string full_name; string full_name;
@ -70,18 +54,7 @@ template <class T> string CPPTracker<T>::KeyName(const T* key)
else else
full_name = base_name + "_" + ind + "__CPP"; full_name = base_name + "_" + ind + "__CPP";
return scope + full_name; return full_name;
}
template <class T> void CPPTracker<T>::LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file)
{
if ( IsInherited(key) )
return;
auto hash = map[key.get()];
auto index = map2[hash];
fprintf(log_file, "hash\n%llu %d %d\n", hash, index, scope);
} }
template <class T> p_hash_type CPPTracker<T>::Hash(IntrusivePtr<T> key) const template <class T> p_hash_type CPPTracker<T>::Hash(IntrusivePtr<T> key) const

View file

@ -4,17 +4,15 @@
// where the key can have any IntrusivePtr type. The properties of a // where the key can have any IntrusivePtr type. The properties of a
// tracker are that it (1) supports a notion that two technically distinct // tracker are that it (1) supports a notion that two technically distinct
// keys in fact reflect the same underlying object, (2) provides an // keys in fact reflect the same underlying object, (2) provides an
// instance of such keys to consistently serve as their "representative", // instance of such keys to consistently serve as their "representative", and
// (3) provides names (suitable for use as C++ variables) for representative // (3) provides names (suitable for use as C++ variables) for representative
// keys, and (4) has a notion of "inheritance" (the underlying object is // keys.
// already available from a previously generated namespace).
// //
// Notions of "same" are taken from hash values ala those provided by // Notions of "same" are taken from hash values ala those provided by
// ProfileFunc. // ProfileFunc.
#pragma once #pragma once
#include "zeek/script_opt/CPP/HashMgr.h"
#include "zeek/script_opt/CPP/InitsInfo.h" #include "zeek/script_opt/CPP/InitsInfo.h"
namespace zeek::detail namespace zeek::detail
@ -28,10 +26,8 @@ public:
// The base name is used to construct key names. "single_global", // The base name is used to construct key names. "single_global",
// if true, specifies that the names should be constructed as // if true, specifies that the names should be constructed as
// indexes into a single global, rather than as distinct globals. // indexes into a single global, rather than as distinct globals.
// The mapper, if present, maps hash values to information about CPPTracker(const char* _base_name, bool _single_global)
// the previously generated scope in which the value appears. : base_name(_base_name), single_global(_single_global)
CPPTracker(const char* _base_name, bool _single_global, VarMapper* _mapper = nullptr)
: base_name(_base_name), single_global(_single_global), mapper(_mapper)
{ {
} }
@ -50,8 +46,7 @@ public:
std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); } std::string KeyName(IntrusivePtr<T> key) { return KeyName(key.get()); }
// Returns all of the distinct keys entered into the tracker. // Returns all of the distinct keys entered into the tracker.
// A key is "distinct" if it's both (1) a representative and // A key is "distinct" if it's a representative.
// (2) not inherited.
const std::vector<IntrusivePtr<T>>& DistinctKeys() const { return keys; } const std::vector<IntrusivePtr<T>>& DistinctKeys() const { return keys; }
// For a given key, get its representative. // For a given key, get its representative.
@ -62,23 +57,6 @@ public:
} }
const T* GetRep(IntrusivePtr<T> key) { return GetRep(key.get()); } const T* GetRep(IntrusivePtr<T> key) { return GetRep(key.get()); }
// True if the given key is represented by an inherited value.
bool IsInherited(const T* key)
{
ASSERT(HasKey(key));
return IsInherited(map[key]);
}
bool IsInherited(const IntrusivePtr<T>& key)
{
ASSERT(HasKey(key));
return IsInherited(map[key.get()]);
}
bool IsInherited(p_hash_type h) { return inherited.count(h) > 0; }
// If the given key is not inherited, logs it and its associated
// scope to the given file.
void LogIfNew(IntrusivePtr<T> key, int scope, FILE* log_file);
private: private:
// Compute a hash for the given key. // Compute a hash for the given key.
p_hash_type Hash(IntrusivePtr<T> key) const; p_hash_type Hash(IntrusivePtr<T> key) const;
@ -88,12 +66,8 @@ private:
std::unordered_map<const T*, std::shared_ptr<CPP_InitInfo>> gi_s; std::unordered_map<const T*, std::shared_ptr<CPP_InitInfo>> gi_s;
// Maps internal representations to distinct values. These // Maps internal representations to distinct values.
// may-or-may-not be indices into an "inherited" namespace scope.
std::unordered_map<p_hash_type, int> map2; std::unordered_map<p_hash_type, int> map2;
std::unordered_map<p_hash_type, std::string> scope2; // only if inherited
std::unordered_set<p_hash_type> inherited; // which are inherited
int num_non_inherited = 0; // distinct non-inherited map2 entries
// Tracks the set of distinct keys, to facilitate iterating over them. // Tracks the set of distinct keys, to facilitate iterating over them.
// Each such key also has an entry in map2. // Each such key also has an entry in map2.
@ -108,9 +82,6 @@ private:
// Whether to base the names out of a single global, or distinct // Whether to base the names out of a single global, or distinct
// globals. // globals.
bool single_global; bool single_global;
// If non-nil, the mapper to consult for previous names.
VarMapper* mapper;
}; };
} // zeek::detail } // zeek::detail

View file

@ -12,69 +12,6 @@ namespace zeek::detail
using namespace std; using namespace std;
bool CPPCompile::CheckForCollisions()
{
for ( auto& g : pfs.AllGlobals() )
{
auto gn = string(g->Name());
if ( hm.HasGlobal(gn) )
{
// Make sure the previous compilation used the
// same type and initialization value for the global.
auto ht_orig = hm.GlobalTypeHash(gn);
auto hv_orig = hm.GlobalValHash(gn);
auto ht = pfs.HashType(g->GetType());
p_hash_type hv = 0;
if ( g->GetVal() )
hv = p_hash(g->GetVal());
if ( ht != ht_orig || hv != hv_orig )
{
fprintf(stderr, "%s: hash clash for global %s (%llu/%llu vs. %llu/%llu)\n",
working_dir.c_str(), gn.c_str(), ht, hv, ht_orig, hv_orig);
fprintf(stderr, "val: %s\n",
g->GetVal() ? obj_desc(g->GetVal().get()).c_str() : "<none>");
return true;
}
}
}
for ( auto& t : pfs.RepTypes() )
{
auto tag = t->Tag();
if ( tag != TYPE_ENUM && tag != TYPE_RECORD )
// Other types, if inconsistent, will just not reuse
// the previously compiled version of the type.
continue;
// We identify enum's and record's by name. Make sure that
// the name either (1) wasn't previously used, or (2) if it
// was, it was likewise for an enum or a record.
const auto& tn = t->GetName();
if ( tn.empty() || ! hm.HasGlobal(tn) )
// No concern of collision since the type name
// wasn't previously compiled.
continue;
if ( tag == TYPE_ENUM && hm.HasEnumTypeGlobal(tn) )
// No inconsistency.
continue;
if ( tag == TYPE_RECORD && hm.HasRecordTypeGlobal(tn) )
// No inconsistency.
continue;
fprintf(stderr, "%s: type \"%s\" collides with compiled global\n", working_dir.c_str(),
tn.c_str());
return true;
}
return false;
}
void CPPCompile::CreateGlobal(const ID* g) void CPPCompile::CreateGlobal(const ID* g)
{ {
auto gn = string(g->Name()); auto gn = string(g->Name());
@ -86,7 +23,7 @@ void CPPCompile::CreateGlobal(const ID* g)
// then we'll call it directly. // then we'll call it directly.
if ( compilable_funcs.count(gn) > 0 ) if ( compilable_funcs.count(gn) > 0 )
{ {
AddGlobal(gn, "zf", true); AddGlobal(gn, "zf");
return; return;
} }
@ -97,7 +34,7 @@ void CPPCompile::CreateGlobal(const ID* g)
} }
} }
if ( AddGlobal(gn, "gl", true) ) if ( AddGlobal(gn, "gl") )
{ // We'll be creating this global. { // We'll be creating this global.
Emit("IDPtr %s;", globals[gn]); Emit("IDPtr %s;", globals[gn]);
@ -146,30 +83,20 @@ void CPPCompile::AddBiF(const ID* b, bool is_var)
if ( is_var ) if ( is_var )
n = n + "_"; // make the name distinct n = n + "_"; // make the name distinct
if ( AddGlobal(n, "bif", true) ) if ( AddGlobal(n, "bif") )
Emit("Func* %s;", globals[n]); Emit("Func* %s;", globals[n]);
ASSERT(BiFs.count(globals[n]) == 0); ASSERT(BiFs.count(globals[n]) == 0);
BiFs[globals[n]] = bn; BiFs[globals[n]] = bn;
} }
bool CPPCompile::AddGlobal(const string& g, const char* suffix, bool track) bool CPPCompile::AddGlobal(const string& g, const char* suffix)
{ {
bool new_var = false; if ( globals.count(g) > 0 )
return false;
if ( globals.count(g) == 0 ) globals.emplace(g, GlobalName(g, suffix));
{ return true;
auto gn = GlobalName(g, suffix);
if ( hm.HasGlobalVar(gn) )
gn = scope_prefix(hm.GlobalVarScope(gn)) + gn;
else
new_var = true;
globals.emplace(g, gn);
}
return new_var;
} }
void CPPCompile::RegisterEvent(string ev_name) void CPPCompile::RegisterEvent(string ev_name)

View file

@ -33,7 +33,7 @@ static std::vector<FuncInfo> funcs;
static ZAMCompiler* ZAM = nullptr; static ZAMCompiler* ZAM = nullptr;
static bool generating_CPP = false; static bool generating_CPP = false;
static std::string hash_dir; // for storing hashes of previous compilations static std::string CPP_dir; // where to generate C++ code
static ScriptFuncPtr global_stmts; static ScriptFuncPtr global_stmts;
@ -203,9 +203,9 @@ static void check_env_opt(const char* opt, bool& opt_flag)
static void init_options() static void init_options()
{ {
auto hd = getenv("ZEEK_HASH_DIR"); auto cppd = getenv("ZEEK_CPP_DIR");
if ( hd ) if ( cppd )
hash_dir = std::string(hd) + "/"; CPP_dir = std::string(cppd) + "/";
// ZAM-related options. // ZAM-related options.
check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform); check_env_opt("ZEEK_DUMP_XFORM", analysis_options.dump_xform);
@ -221,13 +221,14 @@ static void init_options()
check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM); check_env_opt("ZEEK_PROFILE", analysis_options.profile_ZAM);
// Compile-to-C++-related options. // Compile-to-C++-related options.
check_env_opt("ZEEK_ADD_CPP", analysis_options.add_CPP);
check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP); check_env_opt("ZEEK_GEN_CPP", analysis_options.gen_CPP);
check_env_opt("ZEEK_GEN_STANDALONE_CPP", analysis_options.gen_standalone_CPP); check_env_opt("ZEEK_GEN_STANDALONE_CPP", analysis_options.gen_standalone_CPP);
check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all); check_env_opt("ZEEK_COMPILE_ALL", analysis_options.compile_all);
check_env_opt("ZEEK_REPORT_CPP", analysis_options.report_CPP); check_env_opt("ZEEK_REPORT_CPP", analysis_options.report_CPP);
check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP); check_env_opt("ZEEK_USE_CPP", analysis_options.use_CPP);
if ( analysis_options.gen_standalone_CPP ) if ( analysis_options.gen_standalone_CPP || analysis_options.add_CPP )
analysis_options.gen_CPP = true; analysis_options.gen_CPP = true;
if ( analysis_options.gen_CPP ) if ( analysis_options.gen_CPP )
@ -378,15 +379,13 @@ static void use_CPP()
static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs) static void generate_CPP(std::unique_ptr<ProfileFuncs>& pfs)
{ {
const auto hash_name = hash_dir + "CPP-hashes"; const auto gen_name = CPP_dir + "CPP-gen.cc";
auto hm = std::make_unique<CPPHashManager>(hash_name.c_str()); const bool add = analysis_options.add_CPP;
const bool standalone = analysis_options.gen_standalone_CPP;
const bool report = analysis_options.report_uncompilable;
const auto gen_name = hash_dir + "CPP-gen.cc"; CPPCompile cpp(funcs, *pfs, gen_name, add, standalone, report);
const auto addl_name = hash_dir + "CPP-gen-addl.h";
CPPCompile cpp(funcs, *pfs, gen_name, addl_name, *hm, analysis_options.gen_standalone_CPP,
analysis_options.report_uncompilable);
} }
static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs, static void find_when_funcs(std::unique_ptr<ProfileFuncs>& pfs,

View file

@ -96,6 +96,9 @@ struct AnalyOpt
// of the corresponding script, and not activated by default). // of the corresponding script, and not activated by default).
bool gen_standalone_CPP = false; bool gen_standalone_CPP = false;
// Generate C++ that's added to existing generated code.
bool add_CPP = false;
// If true, use C++ bodies if available. // If true, use C++ bodies if available.
bool use_CPP = false; bool use_CPP = false;