zeek/tools/binpac/lib/binpac_buffer.cc

458 lines
12 KiB
C++

// See the file "COPYING" in the main distribution directory for copyright.
#include <cstdlib>
#include <cstring> // for memcpy
#define binpac_regex_h
#include "binpac.h"
#include "binpac_buffer.h"
namespace binpac {
extern double network_time();
namespace {
const unsigned char CR = '\r';
const unsigned char LF = '\n';
} // namespace
binpac::FlowBuffer::Policy binpac::FlowBuffer::policy = {
// max_capacity
10 * 1024 * 1024,
// min_capacity
512,
// contract_threshold
2 * 1024 * 1024,
};
FlowBuffer::FlowBuffer(LineBreakStyle linebreak_style) {
buffer_length_ = 0;
buffer_ = nullptr;
orig_data_begin_ = nullptr;
orig_data_end_ = nullptr;
linebreak_style_ = linebreak_style;
linebreak_style_default = linebreak_style;
linebreaker_ = 0;
ResetLineState();
mode_ = UNKNOWN_MODE;
frame_length_ = 0;
chunked_ = false;
data_seq_at_orig_data_end_ = 0;
eof_ = false;
have_pending_request_ = false;
buffer_n_ = 0;
NewMessage();
}
FlowBuffer::~FlowBuffer() {
if ( buffer_ )
free(buffer_);
}
void FlowBuffer::NewMessage() {
BINPAC_ASSERT(frame_length_ >= 0);
int bytes_to_advance = 0;
if ( buffer_n_ == 0 ) {
switch ( mode_ ) {
case LINE_MODE: bytes_to_advance = (frame_length_ + (linebreak_style_ == STRICT_CRLF ? 2 : 1)); break;
case FRAME_MODE: bytes_to_advance = frame_length_; break;
case UNKNOWN_MODE: break;
}
}
orig_data_begin_ += bytes_to_advance;
BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_);
buffer_n_ = 0;
message_complete_ = false;
ContractBuffer();
}
void FlowBuffer::ResetLineState() {
switch ( linebreak_style_ ) {
case CR_OR_LF: state_ = CR_OR_LF_0; break;
case STRICT_CRLF: state_ = STRICT_CRLF_0; break;
case LINE_BREAKER: break; // Nothing to reset
default: BINPAC_ASSERT(false); break;
}
}
void FlowBuffer::ExpandBuffer(int length) {
if ( buffer_length_ >= length )
return;
if ( length < policy.min_capacity )
length = policy.min_capacity;
if ( length < buffer_length_ * 2 )
length = buffer_length_ * 2;
if ( length > policy.max_capacity ) {
std::string reason = strfmt("expand past max capacity %d/%d", length, policy.max_capacity);
throw ExceptionFlowBufferAlloc(reason.c_str());
}
// Allocate a new buffer and copy the existing contents
buffer_length_ = length;
unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_);
if ( ! new_buf )
throw ExceptionFlowBufferAlloc("expand realloc OOM");
buffer_ = new_buf;
}
void FlowBuffer::ContractBuffer() {
if ( buffer_length_ < policy.contract_threshold )
return;
buffer_length_ = policy.min_capacity;
unsigned char* new_buf = (unsigned char*)realloc(buffer_, buffer_length_);
if ( ! new_buf )
throw ExceptionFlowBufferAlloc("contract realloc OOM");
buffer_ = new_buf;
}
void FlowBuffer::SetLineBreaker(unsigned char* lbreaker) {
linebreaker_ = *lbreaker;
linebreak_style_default = linebreak_style_;
linebreak_style_ = LINE_BREAKER;
}
void FlowBuffer::UnsetLineBreaker() { linebreak_style_ = linebreak_style_default; }
void FlowBuffer::NewLine() {
FlowBuffer::NewMessage();
mode_ = LINE_MODE;
frame_length_ = 0;
chunked_ = false;
have_pending_request_ = true;
if ( state_ == FRAME_0 )
ResetLineState();
MarkOrCopyLine();
}
void FlowBuffer::NewFrame(int frame_length, bool chunked) {
FlowBuffer::NewMessage();
mode_ = FRAME_MODE;
frame_length_ = frame_length;
chunked_ = chunked;
have_pending_request_ = true;
MarkOrCopyFrame();
}
void FlowBuffer::BufferData(const_byteptr data, const_byteptr end) {
mode_ = FRAME_MODE;
frame_length_ += (end - data);
MarkOrCopyFrame();
NewData(data, end);
}
void FlowBuffer::FinishBuffer() { message_complete_ = true; }
void FlowBuffer::GrowFrame(int length) {
BINPAC_ASSERT(frame_length_ >= 0);
if ( length <= frame_length_ )
return;
BINPAC_ASSERT(! chunked_ || frame_length_ == 0);
mode_ = FRAME_MODE;
frame_length_ = length;
MarkOrCopyFrame();
}
void FlowBuffer::DiscardData() {
mode_ = UNKNOWN_MODE;
message_complete_ = false;
have_pending_request_ = false;
orig_data_begin_ = orig_data_end_ = nullptr;
buffer_n_ = 0;
frame_length_ = 0;
ContractBuffer();
}
void FlowBuffer::set_eof() {
// fprintf(stderr, "EOF\n");
eof_ = true;
if ( chunked_ )
frame_length_ = orig_data_end_ - orig_data_begin_;
if ( frame_length_ < 0 )
frame_length_ = 0;
}
void FlowBuffer::NewData(const_byteptr begin, const_byteptr end) {
BINPAC_ASSERT(begin <= end);
ClearPreviousData();
BINPAC_ASSERT((buffer_n_ == 0 && message_complete_) || orig_data_begin_ == orig_data_end_);
orig_data_begin_ = begin;
orig_data_end_ = end;
data_seq_at_orig_data_end_ += (end - begin);
MarkOrCopy();
}
void FlowBuffer::MarkOrCopy() {
if ( ! message_complete_ ) {
switch ( mode_ ) {
case LINE_MODE: MarkOrCopyLine(); break;
case FRAME_MODE: MarkOrCopyFrame(); break;
default: break;
}
}
}
void FlowBuffer::ClearPreviousData() {
// All previous data must have been processed or buffered already
if ( orig_data_begin_ < orig_data_end_ ) {
BINPAC_ASSERT(buffer_n_ == 0);
if ( chunked_ ) {
if ( frame_length_ > 0 ) {
frame_length_ -= (orig_data_end_ - orig_data_begin_);
}
orig_data_begin_ = orig_data_end_;
}
}
}
void FlowBuffer::NewGap(int length) {
ClearPreviousData();
if ( chunked_ && frame_length_ >= 0 ) {
frame_length_ -= length;
if ( frame_length_ < 0 )
frame_length_ = 0;
}
orig_data_begin_ = orig_data_end_ = nullptr;
MarkOrCopy();
}
void FlowBuffer::MarkOrCopyLine() {
switch ( linebreak_style_ ) {
case CR_OR_LF: MarkOrCopyLine_CR_OR_LF(); break;
case STRICT_CRLF: MarkOrCopyLine_STRICT_CRLF(); break;
case LINE_BREAKER: MarkOrCopyLine_LINEBREAK(); break;
default: BINPAC_ASSERT(false); break;
}
}
/*
Finite state automaton for CR_OR_LF:
(!--line is complete, *--add to buffer)
CR_OR_LF_0:
CR: CR_OR_LF_1 !
LF: CR_OR_LF_0 !
.: CR_OR_LF_0 *
CR_OR_LF_1:
CR: CR_OR_LF_1 !
LF: CR_OR_LF_0
.: CR_OR_LF_0 *
*/
void FlowBuffer::MarkOrCopyLine_CR_OR_LF() {
if ( ! (orig_data_begin_ && orig_data_end_) )
return;
if ( state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ && *orig_data_begin_ == LF ) {
state_ = CR_OR_LF_0;
++orig_data_begin_;
}
const_byteptr data;
for ( data = orig_data_begin_; data < orig_data_end_; ++data ) {
switch ( *data ) {
case CR: state_ = CR_OR_LF_1; goto found_end_of_line;
case LF:
// state_ = CR_OR_LF_0;
goto found_end_of_line;
default:
// state_ = CR_OR_LF_0;
break;
}
}
AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_);
return;
found_end_of_line:
if ( buffer_n_ == 0 ) {
frame_length_ = data - orig_data_begin_;
}
else {
AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_);
// But eliminate the last CR or LF
--buffer_n_;
}
message_complete_ = true;
#if DEBUG_FLOW_BUFFER
fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(),
string((const char*)begin(), (const char*)end()).c_str());
#endif
}
/*
Finite state automaton and STRICT_CRLF:
(!--line is complete, *--add to buffer)
STRICT_CRLF_0:
CR: STRICT_CRLF_1 *
LF: STRICT_CRLF_0 *
.: STRICT_CRLF_0 *
STRICT_CRLF_1:
CR: STRICT_CRLF_1 *
LF: STRICT_CRLF_0 ! (--buffer_n_)
.: STRICT_CRLF_0 *
*/
void FlowBuffer::MarkOrCopyLine_STRICT_CRLF() {
const_byteptr data;
for ( data = orig_data_begin_; data < orig_data_end_; ++data ) {
switch ( *data ) {
case CR: state_ = STRICT_CRLF_1; break;
case LF:
if ( state_ == STRICT_CRLF_1 ) {
state_ = STRICT_CRLF_0;
goto found_end_of_line;
}
break;
default: state_ = STRICT_CRLF_0; break;
}
}
AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_);
return;
found_end_of_line:
if ( buffer_n_ == 0 ) {
frame_length_ = data - 1 - orig_data_begin_;
}
else {
AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_);
// Pop the preceding CR and LF from the buffer
buffer_n_ -= 2;
}
message_complete_ = true;
#if DEBUG_FLOW_BUFFER
fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(),
string((const char*)begin(), (const char*)end()).c_str());
#endif
}
void FlowBuffer::MarkOrCopyLine_LINEBREAK() {
if ( ! (orig_data_begin_ && orig_data_end_) )
return;
const_byteptr data;
for ( data = orig_data_begin_; data < orig_data_end_; ++data ) {
if ( *data == linebreaker_ )
goto found_end_of_line;
}
AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_);
return;
found_end_of_line:
if ( buffer_n_ == 0 ) {
frame_length_ = data - orig_data_begin_;
}
else {
AppendToBuffer(orig_data_begin_, data + 1 - orig_data_begin_);
// But eliminate the last 'linebreaker' character
--buffer_n_;
}
message_complete_ = true;
#if DEBUG_FLOW_BUFFER
fprintf(stderr, "%.6f Line complete: [%s]\n", network_time(),
string((const char*)begin(), (const char*)end()).c_str());
#endif
}
// Invariants:
//
// When buffer_n_ == 0:
// Frame = [orig_data_begin_..(orig_data_begin_ + frame_length_)]
//
// When buffer_n_ > 0:
// Frame = [0..buffer_n_][orig_data_begin_..]
void FlowBuffer::MarkOrCopyFrame() {
if ( mode_ == FRAME_MODE && state_ == CR_OR_LF_1 && orig_data_begin_ < orig_data_end_ ) {
// Skip the lingering LF
if ( *orig_data_begin_ == LF ) {
++orig_data_begin_;
}
state_ = FRAME_0;
}
if ( buffer_n_ == 0 ) {
// If there is enough data
if ( frame_length_ >= 0 && orig_data_end_ - orig_data_begin_ >= frame_length_ ) {
// Do nothing except setting the message complete flag
message_complete_ = true;
}
else {
if ( ! chunked_ ) {
AppendToBuffer(orig_data_begin_, orig_data_end_ - orig_data_begin_);
}
message_complete_ = false;
}
}
else {
BINPAC_ASSERT(! chunked_);
int bytes_to_copy = orig_data_end_ - orig_data_begin_;
message_complete_ = false;
if ( frame_length_ >= 0 && buffer_n_ + bytes_to_copy >= frame_length_ ) {
bytes_to_copy = frame_length_ - buffer_n_;
message_complete_ = true;
}
AppendToBuffer(orig_data_begin_, bytes_to_copy);
}
#if DEBUG_FLOW_BUFFER
if ( message_complete_ ) {
fprintf(stderr, "%.6f frame complete: [%s]\n", network_time(),
string((const char*)begin(), (const char*)end()).c_str());
}
#endif
}
void FlowBuffer::AppendToBuffer(const_byteptr data, int len) {
if ( len <= 0 )
return;
BINPAC_ASSERT(! chunked_);
ExpandBuffer(buffer_n_ + len);
memcpy(buffer_ + buffer_n_, data, len);
buffer_n_ += len;
orig_data_begin_ += len;
BINPAC_ASSERT(orig_data_begin_ <= orig_data_end_);
}
} // namespace binpac