zeek/src/InputReaderAscii.cc
Bernhard Amann 4dd95fcf3c support for uninitialized fields & empty sets and tables.
The only snag is... with the default output format of the log-file writer, the input reader cannot tell if a table or set is empty or uninitialized (both cases use the same character by default). In this case, by default it is assumed that the field/vector is uninitalized.
2011-11-16 23:51:51 -08:00

423 lines
9 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include "InputReaderAscii.h"
#include "DebugLogger.h"
#include "NetVar.h"
#include <sstream>
FieldMapping::FieldMapping(const string& arg_name, const TypeTag& arg_type, int arg_position)
: name(arg_name), type(arg_type)
{
position = arg_position;
}
FieldMapping::FieldMapping(const string& arg_name, const TypeTag& arg_type, const TypeTag& arg_subtype, int arg_position)
: name(arg_name), type(arg_type), subtype(arg_subtype)
{
position = arg_position;
}
FieldMapping::FieldMapping(const FieldMapping& arg)
: name(arg.name), type(arg.type), subtype(arg.subtype)
{
position = arg.position;
}
FieldMapping FieldMapping::subType() {
return FieldMapping(name, subtype, position);
}
InputReaderAscii::InputReaderAscii()
{
file = 0;
//keyMap = new map<string, string>();
separator.assign( (const char*) BifConst::InputAscii::separator->Bytes(), BifConst::InputAscii::separator->Len());
if ( separator.size() != 1 ) {
Error("separator length has to be 1. Separator will be truncated.");
}
set_separator.assign( (const char*) BifConst::InputAscii::set_separator->Bytes(), BifConst::InputAscii::set_separator->Len());
if ( set_separator.size() != 1 ) {
Error("set_separator length has to be 1. Separator will be truncated.");
}
empty_field.assign( (const char*) BifConst::InputAscii::empty_field->Bytes(), BifConst::InputAscii::empty_field->Len());
unset_field.assign( (const char*) BifConst::InputAscii::unset_field->Bytes(), BifConst::InputAscii::unset_field->Len());
}
InputReaderAscii::~InputReaderAscii()
{
DoFinish();
}
void InputReaderAscii::DoFinish()
{
columnMap.empty();
if ( file != 0 ) {
file->close();
delete(file);
file = 0;
}
}
bool InputReaderAscii::DoInit(string path, int num_fields, int idx_fields, const LogField* const * fields)
{
fname = path;
file = new ifstream(path.c_str());
if ( !file->is_open() ) {
Error(Fmt("cannot open %s", fname.c_str()));
return false;
}
this->num_fields = num_fields;
this->idx_fields = idx_fields;
this->fields = fields;
return true;
}
bool InputReaderAscii::ReadHeader() {
// try to read the header line...
string line;
if ( !GetLine(line) ) {
Error("could not read first line");
return false;
}
// split on tabs...
istringstream splitstream(line);
unsigned int currTab = 0;
int wantFields = 0;
while ( splitstream ) {
string s;
if ( !getline(splitstream, s, separator[0]))
break;
// current found heading in s... compare if we want it
for ( unsigned int i = 0; i < num_fields; i++ ) {
const LogField* field = fields[i];
if ( field->name == s ) {
// cool, found field. note position
FieldMapping f(field->name, field->type, field->subtype, i);
columnMap.push_back(f);
wantFields++;
break; // done with searching
}
}
// look if we did push something...
if ( columnMap.size() == currTab ) {
// no, we didn't. note that...
FieldMapping empty;
columnMap.push_back(empty);
}
// done
currTab++;
}
if ( wantFields != (int) num_fields ) {
// we did not find all fields?
// :(
Error(Fmt("One of the requested fields could not be found in the input data file. Found %d fields, wanted %d", wantFields, num_fields));
return false;
}
// well, that seems to have worked...
return true;
}
bool InputReaderAscii::GetLine(string& str) {
while ( getline(*file, str) ) {
if ( str[0] != '#' ) {
return true;
}
if ( str.compare(0,8, "#fields\t") == 0 ) {
str = str.substr(8);
return true;
}
}
return false;
}
LogVal* InputReaderAscii::EntryToVal(string s, FieldMapping field) {
LogVal* val = new LogVal(field.type, true);
if ( s.compare(unset_field) == 0 ) { // field is not set...
return new LogVal(field.type, false);
}
switch ( field.type ) {
case TYPE_ENUM:
case TYPE_STRING:
val->val.string_val = new string(s);
break;
case TYPE_BOOL:
if ( s == "T" ) {
val->val.int_val = 1;
} else if ( s == "F" ) {
val->val.int_val = 0;
} else {
Error(Fmt("Invalid value for boolean: %s", s.c_str()));
return false;
}
break;
case TYPE_INT:
val->val.int_val = atoi(s.c_str());
break;
case TYPE_DOUBLE:
case TYPE_TIME:
case TYPE_INTERVAL:
val->val.double_val = atof(s.c_str());
break;
case TYPE_COUNT:
case TYPE_COUNTER:
case TYPE_PORT:
val->val.uint_val = atoi(s.c_str());
break;
case TYPE_SUBNET: {
int pos = s.find("/");
string width = s.substr(pos+1);
val->val.subnet_val.width = atoi(width.c_str());
string addr = s.substr(0, pos);
s = addr;
// NOTE: dottet_to_addr BREAKS THREAD SAFETY! it uses reporter.
// Solve this some other time....
val->val.subnet_val.net = dotted_to_addr(s.c_str());
break;
}
case TYPE_ADDR: {
// NOTE: dottet_to_addr BREAKS THREAD SAFETY! it uses reporter.
// Solve this some other time....
addr_type t = dotted_to_addr(s.c_str());
#ifdef BROv6
copy_addr(t, val->val.addr_val);
#else
copy_addr(&t, val->val.addr_val);
#endif
break;
}
case TYPE_TABLE:
case TYPE_VECTOR:
// First - common initialization
// Then - initialization for table.
// Then - initialization for vector.
// Then - common stuff
{
// how many entries do we have...
unsigned int length = 1;
for ( unsigned int i = 0; i < s.size(); i++ )
if ( s[i] == ',') length++;
unsigned int pos = 0;
if ( s.compare(empty_field) == 0 )
length = 0;
LogVal** lvals = new LogVal* [length];
if ( field.type == TYPE_TABLE ) {
val->val.set_val.vals = lvals;
val->val.set_val.size = length;
} else if ( field.type == TYPE_VECTOR ) {
val->val.vector_val.vals = lvals;
val->val.vector_val.size = length;
} else {
assert(false);
}
if ( length == 0 )
break; //empty
istringstream splitstream(s);
while ( splitstream ) {
string element;
if ( !getline(splitstream, element, set_separator[0]) )
break;
if ( pos >= length ) {
Error(Fmt("Internal error while parsing set. pos %d >= length %d. Element: %s", pos, length, element.c_str()));
break;
}
LogVal* newval = EntryToVal(element, field.subType());
if ( newval == 0 ) {
Error("Error while reading set");
return 0;
}
lvals[pos] = newval;
pos++;
}
if ( pos != length ) {
Error("Internal error while parsing set: did not find all elements");
return 0;
}
break;
}
default:
Error(Fmt("unsupported field format %d for %s", field.type,
field.name.c_str()));
return 0;
}
return val;
}
// read the entire file and send appropriate thingies back to InputMgr
bool InputReaderAscii::DoUpdate() {
// dirty, fix me. (well, apparently after trying seeking, etc - this is not that bad)
if ( file && file->is_open() ) {
file->close();
}
file = new ifstream(fname.c_str());
if ( !file->is_open() ) {
Error(Fmt("cannot open %s", fname.c_str()));
return false;
}
//
// file->seekg(0, ios::beg); // do not forget clear.
if ( ReadHeader() == false ) {
return false;
}
// TODO: all the stuff we need for a second reading.
// *cough*
//
//
// new keymap
//map<string, string> *newKeyMap = new map<string, string>();
string line;
while ( GetLine(line ) ) {
// split on tabs
istringstream splitstream(line);
LogVal** fields = new LogVal*[num_fields];
//string string_fields[num_fields];
unsigned int currTab = 0;
unsigned int currField = 0;
while ( splitstream ) {
string s;
if ( !getline(splitstream, s, separator[0]) )
break;
if ( currTab >= columnMap.size() ) {
Error("Tabs in heading do not match tabs in data?");
//disabled = true;
return false;
}
FieldMapping currMapping = columnMap[currTab];
currTab++;
if ( currMapping.IsEmpty() ) {
// well, that was easy
continue;
}
if ( currField >= num_fields ) {
Error("internal error - fieldnum greater as possible");
return false;
}
LogVal* val = EntryToVal(s, currMapping);
if ( val == 0 ) {
return false;
}
fields[currMapping.position] = val;
//string_fields[currMapping.position] = s;
currField++;
}
if ( currField != num_fields ) {
Error("curr_field != num_fields in DoUpdate. Columns in file do not match column definition.");
return false;
}
SendEntry(fields);
/*
string indexstring = "";
string valstring = "";
for ( unsigned int i = 0; i < idx_fields; i++ ) {
indexstring.append(string_fields[i]);
}
for ( unsigned int i = idx_fields; i < num_fields; i++ ) {
valstring.append(string_fields[i]);
}
string valhash = Hash(valstring);
string indexhash = Hash(indexstring);
if ( keyMap->find(indexhash) == keyMap->end() ) {
// new key
Put(fields);
} else if ( (*keyMap)[indexhash] != valhash ) {
// changed key
Put(fields);
keyMap->erase(indexhash);
} else {
// field not changed
keyMap->erase(indexhash);
}
(*newKeyMap)[indexhash] = valhash;
*/
for ( unsigned int i = 0; i < num_fields; i++ ) {
delete fields[i];
}
delete [] fields;
}
//file->clear(); // remove end of file evil bits
//file->seekg(0, ios::beg); // and seek to start.
EndCurrentSend();
return true;
}