Huge updates to the RDP analyzer from Josh Liburdi.

- More data pulled into scriptland. - Logs expanded with client screen resolution and desired color depth. - Values in UTF-16 on the wire are converted to UTF-8 before being sent to scriptland. - If the RDP turns into SSL records, we now pass data that appears to be SSL to the PIA analyzer. - If RDP uses native encryption with X.509 certs we pass those certs to the files framework and the base scripts pass them forward to the X.509 analyzer. - Lots of cleanup and adjustment to fit the documented protocol a bit better. - Cleaned up the DPD signatures. - Moved to flowunit instead of datagram. - Added tests.
2025-10-02 14:48:21 +00:00 · 2015-03-04 13:12:03 -05:00 · 2015-03-04 13:12:03 -05:00 · bbedb73a45
commit bbedb73a45
parent a63d7307c8
26 changed files with 1535 additions and 346 deletions
--- a/scripts/base/init-bare.bro
+++ b/scripts/base/init-bare.bro
@ -2847,7 +2847,44 @@ export {
 		attributes    : RADIUS::Attributes &optional;
 	};
 }
-module GLOBAL;
+
 module RDP;
 export {
 	type RDP::EarlyCapabilityFlags: record {
 		support_err_info_pdu:       bool;
 		want_32bpp_session:         bool;
 		support_statusinfo_pdu:     bool;
 		strong_asymmetric_keys:     bool;
 		support_monitor_layout_pdu: bool;
 		support_netchar_autodetect: bool;
 		support_dynvc_gfx_protocol: bool;
 		support_dynamic_time_zone:  bool;
 		support_heartbeat_pdu:      bool;
 	};
 	type RDP::ClientCoreData: record {
 		version_major:          count;
 		version_minor:          count;
 		desktop_width:          count;
 		desktop_height:         count;
 		color_depth:            count;
 		sas_sequence:           count;
 		keyboard_layout:        count;
 		client_build:           count;
 		client_name:            string;
 		keyboard_type:          count;
 		keyboard_sub:           count;
 		keyboard_function_key:  count;
 		ime_file_name:          string;
 		post_beta2_color_depth: count  &optional;
 		client_product_id:      string &optional;
 		serial_number:          count  &optional;
 		high_color_depth:       count  &optional;
 		supported_color_depths: count  &optional;
 		ec_flags:               RDP::EarlyCapabilityFlags &optional;
 		dig_product_id:         string &optional;
 	};
 }
@load base/bif/plugins/Bro_SNMP.types.bif
--- a/scripts/base/protocols/rdp/consts.bro
+++ b/scripts/base/protocols/rdp/consts.bro
@ -35,6 +35,21 @@ export {
 		[4] = "FIPS"
 	} &default = function(n: count): string { return fmt("encryption_level-%d", n); };
 	const high_color_depths = {
 		[0x0004] = "4bit",
 		[0x0008] = "8bit",
 		[0x000F] = "15bit",
 		[0x0010] = "16bit",
 		[0x0018] = "24bit"
 	} &default = function(n: count): string { return fmt("high_color_depth-%d", n); };
 	const color_depths = {
 		[0x0001] = "24bit",
 		[0x0002] = "16bit",
 		[0x0004] = "15bit",
 		[0x0008] = "32bit"
 	} &default = function(n: count): string { return fmt("color_depth-%d", n); };
 	const results = {
 		[0] = "Success",
 		[1] = "User rejected",
--- a/scripts/base/protocols/rdp/dpd.sig
+++ b/scripts/base/protocols/rdp/dpd.sig
@ -1,17 +1,12 @@
-signature dpd_rdp_client_request {
+signature dpd_rdp_client {
-  ip-proto == tcp
+	ip-proto == tcp
-  payload /.*Cookie: mstshash\=.*/	
+	# Client request
-  enable "rdp"
+	payload /.*(Cookie: mstshash\=|Duca.*(rdpdr|rdpsnd|drdynvc|cliprdr))/
 	requires-reverse-signature dpd_rdp_server
 	enable "rdp"
 }
-signature dpd_rdp_client_header {
+signature dpd_rdp_server {
-  ip-proto == tcp
+	ip-proto == tcp
-  payload /.*Duca.*(rdpdr|rdpsnd|drdynvc|cliprdr).*/
+	payload /(.{5}\xd0|.*McDn)/
  enable "rdp"
 }
 signature dpd_rdp_server_response {
  ip-proto == tcp
  payload /.*McDn.*/
  enable "rdp"
 }
--- a/scripts/base/protocols/rdp/main.bro
+++ b/scripts/base/protocols/rdp/main.bro
@ -3,155 +3,180 @@
 module RDP;
 export {
-        redef enum Log::ID += { LOG };
+	redef enum Log::ID += { LOG };
-        type Info: record {
+	type Info: record {
-                ## Timestamp for when the event happened.
+		## Timestamp for when the event happened.
-                ts:     		time    &log;
+		ts:                    time    &log;
-                ## Unique ID for the connection.
+		## Unique ID for the connection.
-                uid:    		string  &log;
+		uid:                   string  &log;
-                ## The connection's 4-tuple of endpoint addresses/ports.
+		## The connection's 4-tuple of endpoint addresses/ports.
-                id:     		conn_id &log;
+		id:                    conn_id &log;
-                ## Cookie value used by the client machine.
+		## Cookie value used by the client machine.
-                ## This is typically a username.
+		## This is typically a username.
-                cookie: 		string 	&log &optional;
+		cookie:                string  &log &optional;
-                ## Keyboard layout (language) of the client machine.
+		## Keyboard layout     (language) of the client machine.
-                keyboard_layout:        string 	&log &optional;
+		keyboard_layout:       string  &log &optional;
 		## RDP client version used by the client machine.
-		client_build:		string 	&log &optional;
+		client_build:          string  &log &optional;
-                ## Hostname of the client machine.
+		## Name of the client machine.
-                client_hostname:	string 	&log &optional;
+		client_name:           string  &log &optional;
-                ## Product ID of the client machine.
+		## Product ID of the client machine.
-                client_product_id:	string 	&log &optional;
+		client_dig_product_id: string  &log &optional;
-                ## GCC result for the connection. 
+		## Desktop width of the client machine.
-                result: 		string  &log &optional;
+		desktop_width:         count   &log &optional;
-                ## Encryption level of the connection.
+		## Desktop height of the client machine.
-                encryption_level:       string  &log &optional;
+		desktop_height:        count   &log &optional;
-                ## Encryption method of the connection. 
+		## The color depth requested by the client in 
-                encryption_method:      string  &log &optional;
+		## the high_color_depth field.
-
+		requested_color_depth: string  &log &optional;
-		## The analyzer ID used for the analyzer instance attached
+		## GCC result for the connection. 
-		## to each connection.  It is not used for logging since it's a
+		result:                string  &log &optional;
-		## meaningless arbitrary number.
+		## Encryption level of the connection.
-		analyzer_id:      count            &optional;
+		encryption_level:      string  &log &optional;
-		## Track status of logging RDP connections.
+		## Encryption method of the connection. 
-		done:			bool 	&default=F;
+		encryption_method:     string  &log &optional;
-        };
+		};
 	## If true, detach the RDP analyzer from the connection to prevent
-	## continuing to process encrypted traffic. Helps with performance
+	## continuing to process encrypted traffic.
-	## (especially with large file transfers).
+	const disable_analyzer_after_detection = F &redef;
 	const disable_analyzer_after_detection = T &redef;
 	## The amount of time to monitor an RDP session from when it is first 
 	## identified. When this interval is reached, the session is logged.
-	const rdp_interval = 10secs &redef;
+	const rdp_check_interval = 10secs &redef;
-        ## Event that can be handled to access the rdp record as it is sent on
+	## Event that can be handled to access the rdp record as it is sent on
-        ## to the logging framework.
+	## to the logging framework.
-        global log_rdp: event(rec: Info);
+	global log_rdp: event(rec: Info);
 }
 # Internal fields that aren't useful externally
 redef record Info += {
 	## The analyzer ID used for the analyzer instance attached
 	## to each connection.  It is not used for logging since it's a
 	## meaningless arbitrary number.
 	analyzer_id: count &optional;
 	## Track status of logging RDP connections.
 	done:        bool  &default=F;
 };
 redef record connection += {
-        rdp: Info &optional;
+	rdp: Info &optional;
-        };
+};
 const ports = { 3389/tcp };
 redef likely_server_ports += { ports };
 event bro_init() &priority=5
        {
        Log::create_stream(RDP::LOG, [$columns=Info, $ev=log_rdp]);
        Analyzer::register_for_ports(Analyzer::ANALYZER_RDP, ports);
        }
 # Verify that the RDP session contains
 # RDP data before writing it to the log. 
 function verify_rdp(c: connection)
 	{
-	local info = c$rdp;
+	Log::create_stream(RDP::LOG, [$columns=RDP::Info, $ev=log_rdp]);
-	if ( info?$cookie || info?$keyboard_layout || info?$result )
+	Analyzer::register_for_ports(Analyzer::ANALYZER_RDP, ports);
 	  Log::write(RDP::LOG,info);
 	else
 	  Reporter::error("RDP analyzer was initialized but no data was found");
 	}
-event log_record(c: connection, remove_analyzer: bool)
+function write_log(c: connection)
-        {
+	{
 	local info = c$rdp;
 	if ( info$done )
 		return;
 	# Mark this record as fully logged and finished.
 	info$done = T;
 	# Verify that the RDP session contains
 	# RDP data before writing it to the log. 
 	if ( info?$cookie || info?$keyboard_layout || info?$result )
 		Log::write(RDP::LOG, info);
 	}
 event check_record(c: connection)
 	{
 	# If the record was logged, then stop processing.
-        if ( c$rdp$done )
+	if ( c$rdp$done )
-          return;
+		return;
-	# If the analyzer is no longer attached, then 
+	# If the value rdp_check_interval has passed since the 
 	# log the record and stop processing.
 	if ( ! remove_analyzer )
 	  {
 	  c$rdp$done = T;
 	  verify_rdp(c);
 	  return;
 	  }
 	# If the value rdp_interval has passed since the 
 	# RDP session was started, then log the record. 
-        local diff = network_time() - c$rdp$ts;
+	local diff = network_time() - c$rdp$ts;
-        if ( diff > rdp_interval )
+	if ( diff > rdp_check_interval )
-          {
+		{
-          c$rdp$done = T;
+		write_log(c);
 	  verify_rdp(c);
-	  # Remove the analyzer if it is still attached.
+		# Remove the analyzer if it is still attached.
-          if ( remove_analyzer && disable_analyzer_after_detection && connection_exists(c$id) && c$rdp?$analyzer_id )
+		if ( disable_analyzer_after_detection && 
-            {
+		     connection_exists(c$id) && 
-            disable_analyzer(c$id, c$rdp$analyzer_id);
+		     c$rdp?$analyzer_id )
-            delete c$rdp$analyzer_id;
+			{
-            }
+			disable_analyzer(c$id, c$rdp$analyzer_id);
 			}
-	  return;
+		return;
-          }
+		}
-	# If the analyzer is attached and the duration
+	else
-	# to monitor the RDP session was not met, then
+		{
-	# reschedule the logging event.
+		# If the analyzer is attached and the duration
-        else
+		# to monitor the RDP session was not met, then
-          schedule +rdp_interval { log_record(c,remove_analyzer) };
+		# reschedule the logging event.
-        }
+		schedule rdp_check_interval { check_record(c) };
 		}
 	}
 function set_session(c: connection)
-        {
+	{
-        if ( ! c?$rdp )
+	if ( ! c?$rdp )
-	  {
+		{
-          c$rdp = [$ts=network_time(),$id=c$id,$uid=c$uid];
+		c$rdp = [$ts=network_time(),$id=c$id,$uid=c$uid];
-	  # The RDP session is scheduled to be logged from
+		# The RDP session is scheduled to be logged from
-	  # the time it is first initiated.
+		# the time it is first initiated.
-	  schedule +rdp_interval { log_record(c,T) };	
+		schedule rdp_check_interval { check_record(c) };
-	  }
+		}
-        }
+	}
 event rdp_client_request(c: connection, cookie: string) &priority=5
 	{
 	set_session(c);
 	c$rdp$cookie = cookie;
 	}
-event rdp_client_data(c: connection, keyboard_layout: count, build: count, hostname: string, product_id: string) &priority=5
+event rdp_client_core_data(c: connection, data: RDP::ClientCoreData) &priority=5
 	{
 	set_session(c);
-	c$rdp$keyboard_layout = languages[keyboard_layout];
+
-	c$rdp$client_build = builds[build];
+	c$rdp$keyboard_layout       = RDP::languages[data$keyboard_layout];
-	c$rdp$client_hostname = gsub(cat(hostname),/\\0/,""); 
+	c$rdp$client_build          = RDP::builds[data$client_build];
-	c$rdp$client_product_id = gsub(cat(product_id),/\\0/,"");
+	c$rdp$client_name           = data$client_name;
 	c$rdp$client_dig_product_id = data$dig_product_id;
 	c$rdp$desktop_width         = data$desktop_width;
 	c$rdp$desktop_height        = data$desktop_height;
 	if ( data?$ec_flags && data$ec_flags$want_32bpp_session )
 		c$rdp$requested_color_depth = "32-bit";
 	else
 		c$rdp$requested_color_depth = RDP::high_color_depths[data$high_color_depth];
 	}
 event rdp_result(c: connection, result: count) &priority=5
 	{
-        set_session(c);
+	set_session(c);
-        c$rdp$result = results[result];
+
 	c$rdp$result = RDP::results[result];
 	}
 event rdp_server_security(c: connection, encryption_method: count, encryption_level: count) &priority=5
 	{
 	set_session(c);
-	c$rdp$encryption_method = encryption_methods[encryption_method];
+
-	c$rdp$encryption_level = encryption_levels[encryption_level];
+	c$rdp$encryption_method = RDP::encryption_methods[encryption_method];
 	c$rdp$encryption_level = RDP::encryption_levels[encryption_level];
 	}
 event file_over_new_connection(f: fa_file, c: connection, is_orig: bool)
 	{
 	Files::add_analyzer(f, Files::ANALYZER_X509);
 	# always calculate hashes. They are not necessary for base scripts
 	# but very useful for identification, and required for policy scripts
 	Files::add_analyzer(f, Files::ANALYZER_MD5);
 	Files::add_analyzer(f, Files::ANALYZER_SHA1);
 	}
 event protocol_confirmation(c: connection, atype: Analyzer::Tag, aid: count) &priority=5
@ -167,12 +192,14 @@ event protocol_violation(c: connection, atype: Analyzer::Tag, aid: count, reason
 	{
 	# If a protocol violation occurs, then log the record immediately.
 	if ( c?$rdp )
-	  schedule +0secs { log_record(c,F) };
+		write_log(c);
 	}
 event connection_state_remove(c: connection) &priority=-5
-        {
+	{
 	# If the connection is removed, then log the record immediately.
-        if ( c?$rdp )
+	if ( c?$rdp )
-          schedule +0secs { log_record(c,F) };
+		{
-        }
+		write_log(c);
 		}
 	}
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@ -261,6 +261,7 @@ set(bro_SRCS
    ChunkedIO.cc
    CompHash.cc
    Conn.cc
    ConvertUTF.c
    DFA.cc
    DbgBreakpoint.cc
    DbgHelp.cc
--- a/src/ConvertUTF.c
+++ b/src/ConvertUTF.c
@ -0,0 +1,708 @@
 /*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See LICENSE.TXT for details.
 *
 *===------------------------------------------------------------------------=*/
 /*
 * Copyright 2001-2004 Unicode, Inc.
 * 
 * Disclaimer
 * 
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
 * applicability of information provided. If this file has been
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
 * 
 * Limitations on Rights to Redistribute This Code
 * 
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
 * for internal or external distribution as long as this notice
 * remains attached.
 */
 /* ---------------------------------------------------------------------
    Conversions between UTF32, UTF-16, and UTF-8. Source code file.
    Author: Mark E. Davis, 1994.
    Rev History: Rick McGowan, fixes & updates May 2001.
    Sept 2001: fixed const & error conditions per
        mods suggested by S. Parent & A. Lillich.
    June 2002: Tim Dodd added detection and handling of incomplete
        source sequences, enhanced error detection, added casts
        to eliminate compiler warnings.
    July 2003: slight mods to back out aggressive FFFE detection.
    Jan 2004: updated switches in from-UTF8 conversions.
    Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
    See the header file "ConvertUTF.h" for complete documentation.
 ------------------------------------------------------------------------ */
 #include "ConvertUTF.h"
 #ifdef CVTUTF_DEBUG
 #include <stdio.h>
 #endif
 #include <assert.h>
 static const int halfShift  = 10; /* used for shifting by 10 bits */
 static const UTF32 halfBase = 0x0010000UL;
 static const UTF32 halfMask = 0x3FFUL;
 #define UNI_SUR_HIGH_START  (UTF32)0xD800
 #define UNI_SUR_HIGH_END    (UTF32)0xDBFF
 #define UNI_SUR_LOW_START   (UTF32)0xDC00
 #define UNI_SUR_LOW_END     (UTF32)0xDFFF
 #define false      0
 #define true        1
 /* --------------------------------------------------------------------- */
 /*
 * Index into the table below with the first byte of a UTF-8 sequence to
 * get the number of trailing bytes that are supposed to follow it.
 * Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
 * left as-is for anyone who may want to do such conversion, which was
 * allowed in earlier algorithms.
 */
 static const char trailingBytesForUTF8[256] = {
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
 };
 /*
 * Magic values subtracted from a buffer value during UTF8 conversion.
 * This table contains as many values as there might be trailing bytes
 * in a UTF-8 sequence.
 */
 static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL, 
                     0x03C82080UL, 0xFA082080UL, 0x82082080UL };
 /*
 * Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
 * into the first byte, depending on how many bytes follow.  There are
 * as many entries in this table as there are UTF-8 sequence types.
 * (I.e., one byte sequence, two byte... etc.). Remember that sequencs
 * for *legal* UTF-8 will be 4 or fewer bytes total.
 */
 static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
 /* --------------------------------------------------------------------- */
 /* The interface converts a whole buffer to avoid function-call overhead.
 * Constants have been gathered. Loops & conditionals have been removed as
 * much as possible for efficiency, in favor of drop-through switches.
 * (See "Note A" at the bottom of the file for equivalent code.)
 * If your compiler supports it, the "isLegalUTF8" call can be turned
 * into an inline function.
 */
 /* --------------------------------------------------------------------- */
 ConversionResult ConvertUTF32toUTF16 (
        const UTF32** sourceStart, const UTF32* sourceEnd, 
        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF32* source = *sourceStart;
    UTF16* target = *targetStart;
    while (source < sourceEnd) {
        UTF32 ch;
        if (target >= targetEnd) {
            result = targetExhausted; break;
        }
        ch = *source++;
        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
            /* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                if (flags == strictConversion) {
                    --source; /* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                } else {
                    *target++ = UNI_REPLACEMENT_CHAR;
                }
            } else {
                *target++ = (UTF16)ch; /* normal case */
            }
        } else if (ch > UNI_MAX_LEGAL_UTF32) {
            if (flags == strictConversion) {
                result = sourceIllegal;
            } else {
                *target++ = UNI_REPLACEMENT_CHAR;
            }
        } else {
            /* target is a character in range 0xFFFF - 0x10FFFF. */
            if (target + 1 >= targetEnd) {
                --source; /* Back up source pointer! */
                result = targetExhausted; break;
            }
            ch -= halfBase;
            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
        }
    }
    *sourceStart = source;
    *targetStart = target;
    return result;
 }
 /* --------------------------------------------------------------------- */
 ConversionResult ConvertUTF16toUTF32 (
        const UTF16** sourceStart, const UTF16* sourceEnd, 
        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF16* source = *sourceStart;
    UTF32* target = *targetStart;
    UTF32 ch, ch2;
    while (source < sourceEnd) {
        const UTF16* oldSource = source; /*  In case we have to back up because of target overflow. */
        ch = *source++;
        /* If we have a surrogate pair, convert to UTF32 first. */
        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
            /* If the 16 bits following the high surrogate are in the source buffer... */
            if (source < sourceEnd) {
                ch2 = *source;
                /* If it's a low surrogate, convert to UTF32. */
                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
                    ++source;
                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
                    --source; /* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                }
            } else { /* We don't have the 16 bits following the high surrogate. */
                --source; /* return to the high surrogate */
                result = sourceExhausted;
                break;
            }
        } else if (flags == strictConversion) {
            /* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
                --source; /* return to the illegal value itself */
                result = sourceIllegal;
                break;
            }
        }
        if (target >= targetEnd) {
            source = oldSource; /* Back up source pointer! */
            result = targetExhausted; break;
        }
        *target++ = ch;
    }
    *sourceStart = source;
    *targetStart = target;
 #ifdef CVTUTF_DEBUG
 if (result == sourceIllegal) {
    fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
    fflush(stderr);
 }
 #endif
    return result;
 }
 ConversionResult ConvertUTF16toUTF8 (
        const UTF16** sourceStart, const UTF16* sourceEnd, 
        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF16* source = *sourceStart;
    UTF8* target = *targetStart;
    while (source < sourceEnd) {
        UTF32 ch;
        unsigned short bytesToWrite = 0;
        const UTF32 byteMask = 0xBF;
        const UTF32 byteMark = 0x80; 
        const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
        ch = *source++;
        /* If we have a surrogate pair, convert to UTF32 first. */
        if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
            /* If the 16 bits following the high surrogate are in the source buffer... */
            if (source < sourceEnd) {
                UTF32 ch2 = *source;
                /* If it's a low surrogate, convert to UTF32. */
                if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
                    ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
                        + (ch2 - UNI_SUR_LOW_START) + halfBase;
                    ++source;
                } else if (flags == strictConversion) { /* it's an unpaired high surrogate */
                    --source; /* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                }
            } else { /* We don't have the 16 bits following the high surrogate. */
                --source; /* return to the high surrogate */
                result = sourceExhausted;
                break;
            }
        } else if (flags == strictConversion) {
            /* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
                --source; /* return to the illegal value itself */
                result = sourceIllegal;
                break;
            }
        }
        /* Figure out how many bytes the result will require */
        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
        } else if (ch < (UTF32)0x110000) {  bytesToWrite = 4;
        } else {                            bytesToWrite = 3;
                                            ch = UNI_REPLACEMENT_CHAR;
        }
        target += bytesToWrite;
        if (target > targetEnd) {
            source = oldSource; /* Back up source pointer! */
            target -= bytesToWrite; result = targetExhausted; break;
        }
        switch (bytesToWrite) { /* note: everything falls through. */
            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 1: *--target =  (UTF8)(ch | firstByteMark[bytesToWrite]);
        }
        target += bytesToWrite;
    }
    *sourceStart = source;
    *targetStart = target;
    return result;
 }
 /* --------------------------------------------------------------------- */
 ConversionResult ConvertUTF32toUTF8 (
        const UTF32** sourceStart, const UTF32* sourceEnd, 
        UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF32* source = *sourceStart;
    UTF8* target = *targetStart;
    while (source < sourceEnd) {
        UTF32 ch;
        unsigned short bytesToWrite = 0;
        const UTF32 byteMask = 0xBF;
        const UTF32 byteMark = 0x80; 
        ch = *source++;
        if (flags == strictConversion ) {
            /* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                --source; /* return to the illegal value itself */
                result = sourceIllegal;
                break;
            }
        }
        /*
         * Figure out how many bytes the result will require. Turn any
         * illegally large UTF32 things (> Plane 17) into replacement chars.
         */
        if (ch < (UTF32)0x80) {      bytesToWrite = 1;
        } else if (ch < (UTF32)0x800) {     bytesToWrite = 2;
        } else if (ch < (UTF32)0x10000) {   bytesToWrite = 3;
        } else if (ch <= UNI_MAX_LEGAL_UTF32) {  bytesToWrite = 4;
        } else {                            bytesToWrite = 3;
                                            ch = UNI_REPLACEMENT_CHAR;
                                            result = sourceIllegal;
        }
        target += bytesToWrite;
        if (target > targetEnd) {
            --source; /* Back up source pointer! */
            target -= bytesToWrite; result = targetExhausted; break;
        }
        switch (bytesToWrite) { /* note: everything falls through. */
            case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
            case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
        }
        target += bytesToWrite;
    }
    *sourceStart = source;
    *targetStart = target;
    return result;
 }
 /* --------------------------------------------------------------------- */
 /*
 * Utility routine to tell whether a sequence of bytes is legal UTF-8.
 * This must be called with the length pre-determined by the first byte.
 * If not calling this from ConvertUTF8to*, then the length can be set by:
 *  length = trailingBytesForUTF8[*source]+1;
 * and the sequence is illegal right away if there aren't that many bytes
 * available.
 * If presented with a length > 4, this returns false.  The Unicode
 * definition of UTF-8 goes up to 4-byte sequences.
 */
 static Boolean isLegalUTF8(const UTF8 *source, int length) {
    UTF8 a;
    const UTF8 *srcptr = source+length;
    switch (length) {
    default: return false;
        /* Everything else falls through when "true"... */
    case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
    case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
    case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
        switch (*source) {
            /* no fall-through in this inner switch */
            case 0xE0: if (a < 0xA0) return false; break;
            case 0xED: if (a > 0x9F) return false; break;
            case 0xF0: if (a < 0x90) return false; break;
            case 0xF4: if (a > 0x8F) return false; break;
            default:   if (a < 0x80) return false;
        }
    case 1: if (*source >= 0x80 && *source < 0xC2) return false;
    }
    if (*source > 0xF4) return false;
    return true;
 }
 /* --------------------------------------------------------------------- */
 /*
 * Exported function to return whether a UTF-8 sequence is legal or not.
 * This is not used here; it's just exported.
 */
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
    int length = trailingBytesForUTF8[*source]+1;
    if (length > sourceEnd - source) {
        return false;
    }
    return isLegalUTF8(source, length);
 }
 /* --------------------------------------------------------------------- */
 static unsigned
 findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
                                          const UTF8 *sourceEnd) {
  UTF8 b1, b2, b3;
  assert(!isLegalUTF8Sequence(source, sourceEnd));
  /*
   * Unicode 6.3.0, D93b:
   *
   *   Maximal subpart of an ill-formed subsequence: The longest code unit
   *   subsequence starting at an unconvertible offset that is either:
   *   a. the initial subsequence of a well-formed code unit sequence, or
   *   b. a subsequence of length one.
   */
  if (source == sourceEnd)
    return 0;
  /*
   * Perform case analysis.  See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
   * Byte Sequences.
   */
  b1 = *source;
  ++source;
  if (b1 >= 0xC2 && b1 <= 0xDF) {
    /*
     * First byte is valid, but we know that this code unit sequence is
     * invalid, so the maximal subpart has to end after the first byte.
     */
    return 1;
  }
  if (source == sourceEnd)
    return 1;
  b2 = *source;
  ++source;
  if (b1 == 0xE0) {
    return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
  }
  if (b1 >= 0xE1 && b1 <= 0xEC) {
    return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
  }
  if (b1 == 0xED) {
    return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
  }
  if (b1 >= 0xEE && b1 <= 0xEF) {
    return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
  }
  if (b1 == 0xF0) {
    if (b2 >= 0x90 && b2 <= 0xBF) {
      if (source == sourceEnd)
        return 2;
      b3 = *source;
      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
    }
    return 1;
  }
  if (b1 >= 0xF1 && b1 <= 0xF3) {
    if (b2 >= 0x80 && b2 <= 0xBF) {
      if (source == sourceEnd)
        return 2;
      b3 = *source;
      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
    }
    return 1;
  }
  if (b1 == 0xF4) {
    if (b2 >= 0x80 && b2 <= 0x8F) {
      if (source == sourceEnd)
        return 2;
      b3 = *source;
      return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
    }
    return 1;
  }
  assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
  /*
   * There are no valid sequences that start with these bytes.  Maximal subpart
   * is defined to have length 1 in these cases.
   */
  return 1;
 }
 /* --------------------------------------------------------------------- */
 /*
 * Exported function to return the total number of bytes in a codepoint
 * represented in UTF-8, given the value of the first byte.
 */
 unsigned getNumBytesForUTF8(UTF8 first) {
  return trailingBytesForUTF8[first] + 1;
 }
 /* --------------------------------------------------------------------- */
 /*
 * Exported function to return whether a UTF-8 string is legal or not.
 * This is not used here; it's just exported.
 */
 Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
    while (*source != sourceEnd) {
        int length = trailingBytesForUTF8[**source] + 1;
        if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
            return false;
        *source += length;
    }
    return true;
 }
 /* --------------------------------------------------------------------- */
 ConversionResult ConvertUTF8toUTF16 (
        const UTF8** sourceStart, const UTF8* sourceEnd, 
        UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
    ConversionResult result = conversionOK;
    const UTF8* source = *sourceStart;
    UTF16* target = *targetStart;
    while (source < sourceEnd) {
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
        if (extraBytesToRead >= sourceEnd - source) {
            result = sourceExhausted; break;
        }
        /* Do this check whether lenient or strict */
        if (!isLegalUTF8(source, extraBytesToRead+1)) {
            result = sourceIllegal;
            break;
        }
        /*
         * The cases all fall through. See "Note A" below.
         */
        switch (extraBytesToRead) {
            case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
            case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
            case 3: ch += *source++; ch <<= 6;
            case 2: ch += *source++; ch <<= 6;
            case 1: ch += *source++; ch <<= 6;
            case 0: ch += *source++;
        }
        ch -= offsetsFromUTF8[extraBytesToRead];
        if (target >= targetEnd) {
            source -= (extraBytesToRead+1); /* Back up source pointer! */
            result = targetExhausted; break;
        }
        if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
            /* UTF-16 surrogate values are illegal in UTF-32 */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                if (flags == strictConversion) {
                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                } else {
                    *target++ = UNI_REPLACEMENT_CHAR;
                }
            } else {
                *target++ = (UTF16)ch; /* normal case */
            }
        } else if (ch > UNI_MAX_UTF16) {
            if (flags == strictConversion) {
                result = sourceIllegal;
                source -= (extraBytesToRead+1); /* return to the start */
                break; /* Bail out; shouldn't continue */
            } else {
                *target++ = UNI_REPLACEMENT_CHAR;
            }
        } else {
            /* target is a character in range 0xFFFF - 0x10FFFF. */
            if (target + 1 >= targetEnd) {
                source -= (extraBytesToRead+1); /* Back up source pointer! */
                result = targetExhausted; break;
            }
            ch -= halfBase;
            *target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
            *target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
        }
    }
    *sourceStart = source;
    *targetStart = target;
    return result;
 }
 /* --------------------------------------------------------------------- */
 static ConversionResult ConvertUTF8toUTF32Impl(
        const UTF8** sourceStart, const UTF8* sourceEnd, 
        UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
        Boolean InputIsPartial) {
    ConversionResult result = conversionOK;
    const UTF8* source = *sourceStart;
    UTF32* target = *targetStart;
    while (source < sourceEnd) {
        UTF32 ch = 0;
        unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
        if (extraBytesToRead >= sourceEnd - source) {
            if (flags == strictConversion || InputIsPartial) {
                result = sourceExhausted;
                break;
            } else {
                result = sourceIllegal;
                /*
                 * Replace the maximal subpart of ill-formed sequence with
                 * replacement character.
                 */
                source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
                                                                    sourceEnd);
                *target++ = UNI_REPLACEMENT_CHAR;
                continue;
            }
        }
        if (target >= targetEnd) {
            result = targetExhausted; break;
        }
        /* Do this check whether lenient or strict */
        if (!isLegalUTF8(source, extraBytesToRead+1)) {
            result = sourceIllegal;
            if (flags == strictConversion) {
                /* Abort conversion. */
                break;
            } else {
                /*
                 * Replace the maximal subpart of ill-formed sequence with
                 * replacement character.
                 */
                source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
                                                                    sourceEnd);
                *target++ = UNI_REPLACEMENT_CHAR;
                continue;
            }
        }
        /*
         * The cases all fall through. See "Note A" below.
         */
        switch (extraBytesToRead) {
            case 5: ch += *source++; ch <<= 6;
            case 4: ch += *source++; ch <<= 6;
            case 3: ch += *source++; ch <<= 6;
            case 2: ch += *source++; ch <<= 6;
            case 1: ch += *source++; ch <<= 6;
            case 0: ch += *source++;
        }
        ch -= offsetsFromUTF8[extraBytesToRead];
        if (ch <= UNI_MAX_LEGAL_UTF32) {
            /*
             * UTF-16 surrogate values are illegal in UTF-32, and anything
             * over Plane 17 (> 0x10FFFF) is illegal.
             */
            if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
                if (flags == strictConversion) {
                    source -= (extraBytesToRead+1); /* return to the illegal value itself */
                    result = sourceIllegal;
                    break;
                } else {
                    *target++ = UNI_REPLACEMENT_CHAR;
                }
            } else {
                *target++ = ch;
            }
        } else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
            result = sourceIllegal;
            *target++ = UNI_REPLACEMENT_CHAR;
        }
    }
    *sourceStart = source;
    *targetStart = target;
    return result;
 }
 ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
                                           const UTF8 *sourceEnd,
                                           UTF32 **targetStart,
                                           UTF32 *targetEnd,
                                           ConversionFlags flags) {
  return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
                                flags, /*InputIsPartial=*/true);
 }
 ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
                                    const UTF8 *sourceEnd, UTF32 **targetStart,
                                    UTF32 *targetEnd, ConversionFlags flags) {
  return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
                                flags, /*InputIsPartial=*/false);
 }
 /* ---------------------------------------------------------------------
    Note A.
    The fall-through switches in UTF-8 reading code save a
    temp variable, some decrements & conditionals.  The switches
    are equivalent to the following loop:
        {
            int tmpBytesToRead = extraBytesToRead+1;
            do {
                ch += *source++;
                --tmpBytesToRead;
                if (tmpBytesToRead) ch <<= 6;
            } while (tmpBytesToRead > 0);
        }
    In UTF-8 writing code, the switches on "bytesToWrite" are
    similarly unrolled loops.
   --------------------------------------------------------------------- */
--- a/src/ConvertUTF.h
+++ b/src/ConvertUTF.h
@ -0,0 +1,183 @@
 /*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
 *
 *                     The LLVM Compiler Infrastructure
 *
 * This file is distributed under the University of Illinois Open Source
 * License. See LICENSE.TXT for details.
 *
 *==------------------------------------------------------------------------==*/
 /*
 * Copyright 2001-2004 Unicode, Inc.
 *
 * Disclaimer
 *
 * This source code is provided as is by Unicode, Inc. No claims are
 * made as to fitness for any particular purpose. No warranties of any
 * kind are expressed or implied. The recipient agrees to determine
 * applicability of information provided. If this file has been
 * purchased on magnetic or optical media from Unicode, Inc., the
 * sole remedy for any claim will be exchange of defective media
 * within 90 days of receipt.
 *
 * Limitations on Rights to Redistribute This Code
 *
 * Unicode, Inc. hereby grants the right to freely use the information
 * supplied in this file in the creation of products supporting the
 * Unicode Standard, and to make copies of this file in any form
 * for internal or external distribution as long as this notice
 * remains attached.
 */
 /* ---------------------------------------------------------------------
    Conversions between UTF32, UTF-16, and UTF-8.  Header file.
    Several funtions are included here, forming a complete set of
    conversions between the three formats.  UTF-7 is not included
    here, but is handled in a separate source file.
    Each of these routines takes pointers to input buffers and output
    buffers.  The input buffers are const.
    Each routine converts the text between *sourceStart and sourceEnd,
    putting the result into the buffer between *targetStart and
    targetEnd. Note: the end pointers are *after* the last item: e.g.
    *(sourceEnd - 1) is the last item.
    The return result indicates whether the conversion was successful,
    and if not, whether the problem was in the source or target buffers.
    (Only the first encountered problem is indicated.)
    After the conversion, *sourceStart and *targetStart are both
    updated to point to the end of last text successfully converted in
    the respective buffers.
    Input parameters:
        sourceStart - pointer to a pointer to the source buffer.
                The contents of this are modified on return so that
                it points at the next thing to be converted.
        targetStart - similarly, pointer to pointer to the target buffer.
        sourceEnd, targetEnd - respectively pointers to the ends of the
                two buffers, for overflow checking only.
    These conversion functions take a ConversionFlags argument. When this
    flag is set to strict, both irregular sequences and isolated surrogates
    will cause an error.  When the flag is set to lenient, both irregular
    sequences and isolated surrogates are converted.
    Whether the flag is strict or lenient, all illegal sequences will cause
    an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
    or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
    must check for illegal sequences.
    When the flag is set to lenient, characters over 0x10FFFF are converted
    to the replacement character; otherwise (when the flag is set to strict)
    they constitute an error.
    Output parameters:
        The value "sourceIllegal" is returned from some routines if the input
        sequence is malformed.  When "sourceIllegal" is returned, the source
        value will point to the illegal value that caused the problem. E.g.,
        in UTF-8 when a sequence is malformed, it points to the start of the
        malformed sequence.
    Author: Mark E. Davis, 1994.
    Rev History: Rick McGowan, fixes & updates May 2001.
         Fixes & updates, Sept 2001.
 ------------------------------------------------------------------------ */
 #ifndef LLVM_SUPPORT_CONVERTUTF_H
 #define LLVM_SUPPORT_CONVERTUTF_H
 /* ---------------------------------------------------------------------
    The following 4 definitions are compiler-specific.
    The C standard does not guarantee that wchar_t has at least
    16 bits, so wchar_t is no less portable than unsigned short!
    All should be unsigned values to avoid sign extension during
    bit mask & shift operations.
 ------------------------------------------------------------------------ */
 typedef unsigned int    UTF32;  /* at least 32 bits */
 typedef unsigned short  UTF16;  /* at least 16 bits */
 typedef unsigned char   UTF8;   /* typically 8 bits */
 typedef unsigned char   Boolean; /* 0 or 1 */
 /* Some fundamental constants */
 #define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
 #define UNI_MAX_BMP (UTF32)0x0000FFFF
 #define UNI_MAX_UTF16 (UTF32)0x0010FFFF
 #define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
 #define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
 #define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
 #define UNI_UTF16_BYTE_ORDER_MARK_NATIVE  0xFEFF
 #define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
 typedef enum {
  conversionOK,           /* conversion successful */
  sourceExhausted,        /* partial character in source, but hit end */
  targetExhausted,        /* insuff. room in target for conversion */
  sourceIllegal           /* source sequence is illegal/malformed */
 } ConversionResult;
 typedef enum {
  strictConversion = 0,
  lenientConversion
 } ConversionFlags;
 /* This is for C++ and does no harm in C */
 #ifdef __cplusplus
 extern "C" {
 #endif
 ConversionResult ConvertUTF8toUTF16 (
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
 /**
 * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
 * incomplete code unit sequence, returns \c sourceExhausted.
 */
 ConversionResult ConvertUTF8toUTF32Partial(
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
 /**
 * Convert a partial UTF8 sequence to UTF32.  If the sequence ends in an
 * incomplete code unit sequence, returns \c sourceIllegal.
 */
 ConversionResult ConvertUTF8toUTF32(
  const UTF8** sourceStart, const UTF8* sourceEnd,
  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
 ConversionResult ConvertUTF16toUTF8 (
  const UTF16** sourceStart, const UTF16* sourceEnd,
  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
 ConversionResult ConvertUTF32toUTF8 (
  const UTF32** sourceStart, const UTF32* sourceEnd,
  UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
 ConversionResult ConvertUTF16toUTF32 (
  const UTF16** sourceStart, const UTF16* sourceEnd,
  UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
 ConversionResult ConvertUTF32toUTF16 (
  const UTF32** sourceStart, const UTF32* sourceEnd,
  UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
 unsigned getNumBytesForUTF8(UTF8 firstByte);
 #ifdef __cplusplus
 }
 #endif
 /* --------------------------------------------------------------------- */
 #endif
--- a/src/analyzer/protocol/rdp/CMakeLists.txt
+++ b/src/analyzer/protocol/rdp/CMakeLists.txt
@ -5,5 +5,6 @@ include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DI
 bro_plugin_begin(Bro RDP)
 	bro_plugin_cc(RDP.cc Plugin.cc)
 	bro_plugin_bif(events.bif)
 	bro_plugin_bif(types.bif)
 	bro_plugin_pac(rdp.pac rdp-analyzer.pac rdp-protocol.pac)
 bro_plugin_end()
--- a/src/analyzer/protocol/rdp/RDP.cc
+++ b/src/analyzer/protocol/rdp/RDP.cc
@ -2,17 +2,17 @@
 #include "analyzer/protocol/tcp/TCP_Reassembler.h"
 #include "Reporter.h"
 #include "events.bif.h"
 #include "types.bif.h"
 using namespace analyzer::rdp;
 RDP_Analyzer::RDP_Analyzer(Connection* c)
-
+	: tcp::TCP_ApplicationAnalyzer("RDP", c)
 : tcp::TCP_ApplicationAnalyzer("RDP", c)
 	{
 	interp = new binpac::RDP::RDP_Conn(this);
 	had_gap = false;
-	
+	pia = 0;
 	}
 RDP_Analyzer::~RDP_Analyzer()
@ -22,12 +22,10 @@ RDP_Analyzer::~RDP_Analyzer()
 void RDP_Analyzer::Done()
 	{
 	tcp::TCP_ApplicationAnalyzer::Done();
 	interp->FlowEOF(true);
 	interp->FlowEOF(false);
 	}
 void RDP_Analyzer::EndpointEOF(bool is_orig)
@ -49,13 +47,47 @@ void RDP_Analyzer::DeliverStream(int len, const u_char* data, bool orig)
 		// deliver data to the other side if the script layer can handle this.
 		return;
-	try
+	// If the data appears (very loosely) to be SSL/TLS
 	// we'll just move this over to the PIA analyzer.
 	// Like the comment below says, this is probably the wrong
 	// way to handle this.
 	if ( len > 0 && data[0] >= 0x14 && data[0] <= 0x17 )
 		{
-		interp->NewData(orig, data, data + len);
+		if ( ! pia )
 			{
 			pia = new pia::PIA_TCP(Conn());
 			if ( AddChildAnalyzer(pia) )
 				{
 				pia->FirstPacket(true, 0);
 				pia->FirstPacket(false, 0);
 				}
 			}
 		if ( pia )
 			{
 			ForwardStream(len, data, orig);
 			}
 		}
-	catch ( const binpac::Exception& e )
+	else if ( pia )
 		{
-		ProtocolViolation(fmt("Binpac exception: %s", e.c_msg()));
+		// This is data that doesn't seem to match 
 		// an SSL record, but we've moved into SSL mode.
 		// This is probably the wrong way to handle this
 		// situation but I don't know what these records
 		// are that don't appear to be SSL/TLS.
 		return;
 		}
 	else
 		{
 		try
 			{
 			interp->NewData(orig, data, data + len);
 			}
 		catch ( const binpac::Exception& e )
 			{
 			ProtocolViolation(fmt("Binpac exception: %s", e.c_msg()));
 			}
 		}
 	}
--- a/src/analyzer/protocol/rdp/RDP.h
+++ b/src/analyzer/protocol/rdp/RDP.h
@ -5,6 +5,7 @@
 #include "analyzer/protocol/tcp/TCP.h"
 #include "analyzer/protocol/pia/PIA.h"
 #include "rdp_pac.h"
@ -21,11 +22,8 @@ public:
 	virtual void DeliverStream(int len, const u_char* data, bool orig);
 	virtual void Undelivered(uint64 seq, int len, bool orig);
 	// Overriden from tcp::TCP_ApplicationAnalyzer.
 	virtual void EndpointEOF(bool is_orig);
 	static analyzer::Analyzer* InstantiateAnalyzer(Connection* conn)
 		{ return new RDP_Analyzer(conn); }
@ -40,7 +38,7 @@ protected:
 	binpac::RDP::RDP_Conn* interp;
 	bool had_gap;
-	
+	pia::PIA_TCP *pia;
 };
 } } // namespace analyzer::* 
--- a/src/analyzer/protocol/rdp/events.bif
+++ b/src/analyzer/protocol/rdp/events.bif
@ -9,14 +9,8 @@ event rdp_client_request%(c: connection, cookie: string%);
 ##
 ## c: The connection record for the underlying transport-layer session/flow.
 ##
-## keyboard_layout: The 16-bit integer representing the keyboard layout/language of the client machine.
+## data: The data contained in the client core data structure.
-##
+event rdp_client_core_data%(c: connection, data: RDP::ClientCoreData%);
 ## build: The 16-bit integer representing the version of the RDP client.
 ##
 ## hostname: The hostname of the client machine (optional).
 ##
 ## product_id: The product ID of the client machine (optional).
 event rdp_client_data%(c: connection, keyboard_layout: count, build: count, hostname: string, product_id: string%);
 ## Generated for MCS server responses when native RDP encryption is used.
 ##
--- a/src/analyzer/protocol/rdp/rdp-analyzer.pac
+++ b/src/analyzer/protocol/rdp/rdp-analyzer.pac
@ -1,63 +1,147 @@
 %extern{
 #include "ConvertUTF.h"
 #include "file_analysis/Manager.h"
 #include "types.bif.h"
 %}
 refine flow RDP_Flow += {
        function proc_rdp_client_request(client_request: Client_Request): bool
                %{
                connection()->bro_analyzer()->ProtocolConfirmation();
-                BifEvent::generate_rdp_client_request(connection()->bro_analyzer(),
+	function utf16_to_utf8_val(utf16: bytestring): StringVal
-                                                      connection()->bro_analyzer()->Conn(),
+		%{
-                                                      bytestring_to_val(${client_request.cookie_value}));
+		size_t utf8size = 3 * utf16.length() + 1;
 		char* utf8stringnative = new char[utf8size];
 		const UTF16* sourcestart = reinterpret_cast<const UTF16*>(utf16.begin());
 		const UTF16* sourceend = sourcestart + utf16.length();
 		UTF8* targetstart = reinterpret_cast<UTF8*>(utf8stringnative);
 		UTF8* targetend = targetstart + utf8size;
-                return true;
+		ConversionResult res = ConvertUTF16toUTF8(&sourcestart, 
-                %}
+		                                          sourceend,
 		                                          &targetstart, 
 		                                          targetend, 
 		                                          strictConversion);
 		*targetstart = 0;
-        function proc_rdp_result(gcc_response: GCC_Server_Create_Response): bool
+		if ( res != conversionOK )
-                %{
+			{
-                connection()->bro_analyzer()->ProtocolConfirmation();
+			connection()->bro_analyzer()->Weird("Failed UTF-16 to UTF-8 conversion");
-                BifEvent::generate_rdp_result(connection()->bro_analyzer(),
+			return new StringVal(utf16.length(), (const char *) utf16.begin());
-                                              connection()->bro_analyzer()->Conn(),
+			}
 					      ${gcc_response.result});
 		// We're relying on no nulls being in the string.
 		return new StringVal(utf8stringnative);
 		%}
 	function proc_rdp_client_request(client_request: Client_Request): bool
 		%{
 		connection()->bro_analyzer()->ProtocolConfirmation();
 		BifEvent::generate_rdp_client_request(connection()->bro_analyzer(),
 		                                      connection()->bro_analyzer()->Conn(),
 		                                      bytestring_to_val(${client_request.cookie_value}));
 		return true;
 		%}
 	function proc_rdp_result(gcc_response: GCC_Server_Create_Response): bool
 		%{
 		connection()->bro_analyzer()->ProtocolConfirmation();
 		BifEvent::generate_rdp_result(connection()->bro_analyzer(),
 		                              connection()->bro_analyzer()->Conn(),
 		                              ${gcc_response.result});
 		return true;
 		%}
-        function proc_rdp_client_data(ccore: Client_Core_Data): bool
+	function proc_rdp_client_core_data(ccore: Client_Core_Data): bool
-                %{
+		%{
-                connection()->bro_analyzer()->ProtocolConfirmation();
+		connection()->bro_analyzer()->ProtocolConfirmation();
                BifEvent::generate_rdp_client_data(connection()->bro_analyzer(),
                                                   connection()->bro_analyzer()->Conn(),
                                                   ${ccore.keyboard_layout},
 						   ${ccore.client_build},
 						   bytestring_to_val(${ccore.client_name}),
                                                   bytestring_to_val(${ccore.dig_product_id}));
-                return true;
+		RecordVal* ec_flags = new RecordVal(BifType::Record::RDP::EarlyCapabilityFlags);
-                %}
+		ec_flags->Assign(0, new Val(${ccore.SUPPORT_ERRINFO_PDU}, TYPE_BOOL));
 		ec_flags->Assign(1, new Val(${ccore.WANT_32BPP_SESSION}, TYPE_BOOL));
 		ec_flags->Assign(2, new Val(${ccore.SUPPORT_STATUSINFO_PDU}, TYPE_BOOL));
 		ec_flags->Assign(3, new Val(${ccore.STRONG_ASYMMETRIC_KEYS}, TYPE_BOOL));
 		ec_flags->Assign(4, new Val(${ccore.SUPPORT_MONITOR_LAYOUT_PDU}, TYPE_BOOL));
 		ec_flags->Assign(5, new Val(${ccore.SUPPORT_NETCHAR_AUTODETECT}, TYPE_BOOL));
 		ec_flags->Assign(6, new Val(${ccore.SUPPORT_DYNVC_GFX_PROTOCOL}, TYPE_BOOL));
 		ec_flags->Assign(7, new Val(${ccore.SUPPORT_DYNAMIC_TIME_ZONE}, TYPE_BOOL));
 		ec_flags->Assign(8, new Val(${ccore.SUPPORT_HEARTBEAT_PDU}, TYPE_BOOL));
-        function proc_rdp_server_security(ssd: Server_Security_Data): bool
+		RecordVal* ccd = new RecordVal(BifType::Record::RDP::ClientCoreData);
-                %{
+		ccd->Assign(0, new Val(${ccore.version_major}, TYPE_COUNT));
-                connection()->bro_analyzer()->ProtocolConfirmation();
+		ccd->Assign(1, new Val(${ccore.version_minor}, TYPE_COUNT));
-                BifEvent::generate_rdp_server_security(connection()->bro_analyzer(),
+		ccd->Assign(2, new Val(${ccore.desktop_width}, TYPE_COUNT));
-                                                       connection()->bro_analyzer()->Conn(),
+		ccd->Assign(3, new Val(${ccore.desktop_height}, TYPE_COUNT));
-                                                       ${ssd.encryption_method},
+		ccd->Assign(4, new Val(${ccore.color_depth}, TYPE_COUNT));
-                                                       ${ssd.encryption_level});
+		ccd->Assign(5, new Val(${ccore.sas_sequence}, TYPE_COUNT));
 		ccd->Assign(6, new Val(${ccore.keyboard_layout}, TYPE_COUNT));
 		ccd->Assign(7, new Val(${ccore.client_build}, TYPE_COUNT));
 		ccd->Assign(8, utf16_to_utf8_val(${ccore.client_name}));
 		ccd->Assign(9, new Val(${ccore.keyboard_type}, TYPE_COUNT));
 		ccd->Assign(10, new Val(${ccore.keyboard_sub}, TYPE_COUNT));
 		ccd->Assign(11, new Val(${ccore.keyboard_function_key}, TYPE_COUNT));
 		ccd->Assign(12, utf16_to_utf8_val(${ccore.ime_file_name}));
 		ccd->Assign(13, new Val(${ccore.post_beta2_color_depth}, TYPE_COUNT));
 		ccd->Assign(14, new Val(${ccore.client_product_id}, TYPE_COUNT));
 		ccd->Assign(15, new Val(${ccore.serial_number}, TYPE_COUNT));
 		ccd->Assign(16, new Val(${ccore.high_color_depth}, TYPE_COUNT));
 		ccd->Assign(17, new Val(${ccore.supported_color_depths}, TYPE_COUNT));
 		ccd->Assign(18, ec_flags);
 		ccd->Assign(19, utf16_to_utf8_val(${ccore.dig_product_id}));
-                return true;
+		BifEvent::generate_rdp_client_core_data(connection()->bro_analyzer(),
-                %}
+		                                        connection()->bro_analyzer()->Conn(),
 		                                        ccd);
 		return true;
 		%}
 	function proc_rdp_server_security(ssd: Server_Security_Data): bool
 		%{
 		connection()->bro_analyzer()->ProtocolConfirmation();
 		BifEvent::generate_rdp_server_security(connection()->bro_analyzer(),
 		                                       connection()->bro_analyzer()->Conn(),
 		                                       ${ssd.encryption_method},
 		                                       ${ssd.encryption_level});
 		return true;
 		%}
 	function proc_x509_cert(x509: X509): bool
 		%{
 		const bytestring& cert = ${x509.cert};
 		ODesc file_handle;
 		file_handle.AddRaw("Analyzer::ANALYZER_RDP");
 		file_handle.Add(connection()->bro_analyzer()->Conn()->StartTime());
 		connection()->bro_analyzer()->Conn()->IDString(&file_handle);
 		string file_id = file_mgr->HashHandle(file_handle.Description());
 		file_mgr->DataIn(reinterpret_cast<const u_char*>(cert.data()),
 		                 cert.length(), 
 		                 connection()->bro_analyzer()->GetAnalyzerTag(),
 		                 connection()->bro_analyzer()->Conn(), 
 		                 false, // It seems there are only server certs?
 		                 file_id);
 		file_mgr->EndOfFile(file_id);
 		return true;
 		%}
 };
 refine typeattr Client_Request += &let {
-        proc: bool = $context.flow.proc_rdp_client_request(this);
+	proc: bool = $context.flow.proc_rdp_client_request(this);
 };
 refine typeattr Client_Core_Data += &let {
-  proc: bool = $context.flow.proc_rdp_client_data(this);
+	proc: bool = $context.flow.proc_rdp_client_core_data(this);
 };
 refine typeattr GCC_Server_Create_Response += &let {
-        proc: bool = $context.flow.proc_rdp_result(this);
+	proc: bool = $context.flow.proc_rdp_result(this);
 };
 refine typeattr Server_Security_Data += &let {
-        proc: bool = $context.flow.proc_rdp_server_security(this);
+	proc: bool = $context.flow.proc_rdp_server_security(this);
 };
 refine typeattr X509 += &let {
 	proc: bool = $context.flow.proc_x509_cert(this);
 };
--- a/src/analyzer/protocol/rdp/rdp-protocol.pac
+++ b/src/analyzer/protocol/rdp/rdp-protocol.pac
@ -1,45 +1,74 @@
 type RDP_PDU(is_orig: bool) = record {
 	type:	uint8;
 	switch: case type of {
 	  0x16 		-> 	ssl_encryption: 	bytestring &restofdata &transient; 	# send to SSL analyzer in the future
 	  default 	->	native_encryption:	Native_Encryption;			# TPKT version
 	};
 } &byteorder=bigendian;
-######################################################################
+type TPKT(is_orig: bool) = record {
-# Native Encryption
+	version:  uint8;
-######################################################################
+	reserved: uint8;
 	tpkt_len: uint16;
-type Native_Encryption = record {
+# These data structures are merged together into TPKT
-	tpkt_reserved:	uint8;
+# because there are packets that report incorrect 
-	tpkt_length:	uint16;
+# lengths in the tpkt length field.  No clue why.
-        cotp:   	COTP;
+
-};
+	cotp:     COTP;
 } &byteorder=bigendian &length=tpkt_len;
 type COTP = record {
-        length: uint8;
+	cotp_len:  uint8;
-        pdu:    uint8;
+	pdu:       uint8;
-        switch: case pdu of {
+	# Probably should do something with this eventually.
-          0xe0    -> cRequest:    Client_Request;
+	#cotp_crap: padding[cotp_len-2];
-          0xf0    -> hdr:         COTP_Header;
+	switch:    case pdu of {
-          default -> data:        bytestring &restofdata &transient;
+		#0xd0    -> cConfirm: Connect_Confirm;
-        };
+		0xe0    -> c_request: Client_Request;
 		0xf0    -> data:      DT_Data;
 		# In case we don't support the PDU we just
 		# consume the rest of it and throw it away.
 		default -> not_done:  bytestring &restofdata &transient;
 	};
 } &byteorder=littleendian;
-type COTP_Header = record {
+type DT_Data = record {
-        tpdu_number:    		uint8;
+	tpdu_number:              uint8;
-        application_defined_type:       uint8;  	# this begins a BER encoded multiple octet variant, but can be safely skipped
+	# multiple octet variant of the ASN.1 type field, should handle this better.
-        application_type:           	uint8;      	# this is value for the BER encoded octet variant above
+	application_defined_type: uint8;
-        switch: case application_type of {		# this seems to cause a binpac exception error
+	application_type:         uint8;
-          0x65  	-> cHeader:     Client_Header;     # 0x65 is a client
+
-          0x66  	-> sHeader:     Server_Header;     # 0x66 is a server
+	data: case application_type of {
-          default 	-> data:      	bytestring &restofdata;
+		0x65    -> client: Client_Header; # 0x65 is a client
-        };
+		0x66    -> server: Server_Header; # 0x66 is a server
 		default -> none:   empty;
 	};
 } &byteorder=littleendian;
 ######################################################################
 # Data Blocks
 ######################################################################
 type Data_Header = record {
-        type:   uint16;
+	type:   uint16;
-        length: uint16;
+	length: uint16;
 } &byteorder=littleendian;
 type Data_Block = record {
 	header: Data_Header;
 	block: case header.type of {
 		0xc001  -> client_core:       Client_Core_Data;
 		#0xc002  -> client_security:   Client_Security_Data;
 		#0xc003  -> client_network:    Client_Network_Data;
 		#0xc004  -> client_cluster:    Client_Cluster_Data;
 		#0xc005  -> client_monitor:    Client_Monitor_Data;
 		#0xc006  -> client_msgchannel: Client_MsgChannel_Data;
 		#0xc008  -> client_monitor_ex: Client_MonitorExtended_Data;
 		#0xc00A  -> client_multitrans: Client_MultiTransport_Data;
 		0x0c01  -> server_core:       Server_Core_Data;
 		0x0c02  -> server_security:   Server_Security_Data;
 		0x0c03  -> server_network:    Server_Network_Data;
 		#0x0c04  -> server_msgchannel: Server_MsgChannel_Data;
 		#0x0c08  -> server_multitrans: Server_MultiTransport_Data;
 		default -> unhandled:  bytestring &restofdata &transient;
 	} &length=header.length-4;
 } &byteorder=littleendian;
 ######################################################################
@ -47,11 +76,11 @@ type Data_Header = record {
 ######################################################################
 type Client_Request = record {
-        destination_reference:  uint16;
+	destination_reference: uint16;
-        source_reference:       uint16;
+	source_reference:      uint16;
-        flow_control:   	uint8;
+	flow_control:          uint8;
-	cookie_mstshash:	RE/Cookie: mstshash\=/;	# &check would be better here, but it is not implemented
+	cookie_mstshash:       RE/Cookie: mstshash\=/;
-	cookie_value:		RE/[^\x0d]*/;		# the value is anything up to \x0d
+	cookie_value:          RE/[^\x0d]*/;
 };
 ######################################################################
@ -59,158 +88,173 @@ type Client_Request = record {
 ######################################################################
 type Client_Header = record {
-	type_length:			uint8[3]; # BER encoded long variant, can be safely skipped for now
+	type_length:               ASN1Integer;
-	calling_domain_selector:	ASN1OctetString;
+	calling_domain_selector:   ASN1OctetString;
-	called_domain_selector:		ASN1OctetString;
+	called_domain_selector:    ASN1OctetString;
-	upward_flag:			ASN1Boolean;
+	upward_flag:               ASN1Boolean;
-	target_parameters:		ASN1SequenceMeta;
+	target_parameters:         ASN1SequenceMeta;
-	targ_parameters_pad:		padding[target_parameters.encoding.length];
+	targ_parameters_pad:       padding[target_parameters.encoding.length];
-	minimum_parameters:		ASN1SequenceMeta;
+	minimum_parameters:        ASN1SequenceMeta;
-	min_parameters_pad:		padding[minimum_parameters.encoding.length];
+	min_parameters_pad:        padding[minimum_parameters.encoding.length];
-	maximum_parameters:		ASN1SequenceMeta;
+	maximum_parameters:        ASN1SequenceMeta;
-	max_parameters_pad:		padding[maximum_parameters.encoding.length];
+	max_parameters_pad:        padding[maximum_parameters.encoding.length];
-	user_data_length:		uint32; # BER encoded OctetString and long variant, can be safely skipped for now
+	# BER encoded OctetString and long variant, can be safely skipped for now
-	gcc_connection_data:    	GCC_Client_Connection_Data;
+	user_data_length:          uint32;
-	gcc_client_create_request:	GCC_Client_Create_Request;
+	gcc_connection_data:       GCC_Client_Connection_Data;
-        core_header:                    Data_Header;
+	gcc_client_create_request: GCC_Client_Create_Request;
-        core_data:      		Client_Core_Data;
+	data_blocks:               Data_Block[] &until($input.length() == 0);
 	remainder:			bytestring &restofdata &transient; # everything after core_data can be discarded
 };
 type GCC_Client_Connection_Data = record {
-        key_object_length:              uint16;
+	key_object_length:        uint16;
-        key_object:                     uint8[key_object_length];
+	key_object:               uint8[key_object_length];
-        connect_data_connect_pdu:       uint16;
+	connect_data_connect_pdu: uint16;
 } &byteorder=bigendian;
 type GCC_Client_Create_Request = record {
-        extension_bit:                  uint8;
+	extension_bit:           uint8;
-        privileges:                     uint8;
+	privileges:              uint8;
-        numeric_length:                 uint8;
+	numeric_length:          uint8;
-        numeric:                        uint8;
+	numeric:                 uint8;
-        termination_method:             uint8;
+	termination_method:      uint8;
-        number_user_data_sets:          uint8;
+	number_user_data_sets:   uint8;
-        user_data_value_present:        uint8;
+	user_data_value_present: uint8;
-        h221_nonstandard_length:        uint8;
+	h221_nonstandard_length: uint8;
-        h221_nonstandard_key:           RE/Duca/; # &check would be better here, but it is not implemented
+	h221_nonstandard_key:    RE/Duca/;
-        user_data_value_length:         uint16;
+	user_data_value_length:  uint16;
-};
+} &byteorder=bigendian;
 type Client_Core_Data = record {
-        version_major:          uint16;
+	version_major:            uint16;
-        version_minor:          uint16;
+	version_minor:            uint16;
-        desktop_width:          uint16;
+	desktop_width:            uint16;
-        desktop_height:         uint16;
+	desktop_height:           uint16;
-        color_depth:            uint16;
+	color_depth:              uint16;
-        sas_sequence:           uint16;
+	sas_sequence:             uint16;
-        keyboard_layout:        uint32;
+	keyboard_layout:          uint32;
-        client_build:           uint32;
+	client_build:             uint32;
-        client_name:            bytestring &length=32;
+	client_name:              bytestring &length=32;
-        keyboard_type:          uint32;
+	keyboard_type:            uint32;
-        keyboard_sub:           uint32;
+	keyboard_sub:             uint32;
-        keyboard_function_key:  uint32;
+	keyboard_function_key:    uint32;
-        ime_file_name:          bytestring &length=64;
+	ime_file_name:            bytestring &length=64;
-        post_beta_color_depth:  uint16;
+	# Everything below here is optional and should be handled better.
-        product_id:             uint16;
+	# If some of these fields aren't included it could lead to parse failure.
-        serial_number:          uint32;
+	post_beta2_color_depth:   uint16;
-        high_color_depth:       uint16;
+	client_product_id:        uint16;
-        supported_color_depth:  uint16;
+	serial_number:            uint32;
-        early_capability_flags: uint16;
+	high_color_depth:         uint16;
-        dig_product_id:         bytestring &length=64;
+	supported_color_depths:   uint16;
-};
+	early_capability_flags:   uint16;
 	dig_product_id:           bytestring &length=64;
 	# There are more optional fields here but they are  
 	# annoying to optionally parse in binpac.
 	# Documented here: https://msdn.microsoft.com/en-us/library/cc240510.aspx
 } &let {
 	SUPPORT_ERRINFO_PDU:        bool = early_capability_flags & 0x01;
 	WANT_32BPP_SESSION:         bool = early_capability_flags & 0x02;
 	SUPPORT_STATUSINFO_PDU:     bool = early_capability_flags & 0x04;
 	STRONG_ASYMMETRIC_KEYS:     bool = early_capability_flags & 0x08;
 	SUPPORT_MONITOR_LAYOUT_PDU: bool = early_capability_flags & 0x40;
 	SUPPORT_NETCHAR_AUTODETECT: bool = early_capability_flags & 0x80;
 	SUPPORT_DYNVC_GFX_PROTOCOL: bool = early_capability_flags & 0x0100;
 	SUPPORT_DYNAMIC_TIME_ZONE:  bool = early_capability_flags & 0x0200;
 	SUPPORT_HEARTBEAT_PDU:      bool = early_capability_flags & 0x0400;
 } &byteorder=littleendian;
 ######################################################################
 # Server MCS
 ######################################################################
 type Server_Header = record {
-	type_length:                    	uint8[3]; 							# BER encoded long variant, can be safely skipped for now
+	# We don't need this value, but it's ASN.1 integer in definite length
-	connect_response_result:		ASN1Enumerated;
+	# so I think we can skip over it.
-	connect_response_called_id:		ASN1Integer;
+	type_length:                        uint8[3];
-	connect_response_domain_parameters:	ASN1SequenceMeta;
+	connect_response_result:            ASN1Enumerated;
-        domain_parameters_pad:            	padding[connect_response_domain_parameters.encoding.length]; 	# skip this data
+	connect_response_called_id:         ASN1Integer;
-	user_data_length:               	uint32; 							# BER encoded OctetString and long variant, can be safely skipped for now
+	connect_response_domain_parameters: ASN1SequenceMeta;
-	gcc_connection_data:			GCC_Server_Connection_Data;
+	# Skipping over domain parameters for now.
-	gcc_create_response:			GCC_Server_Create_Response;
+	domain_parameters:                  padding[connect_response_domain_parameters.encoding.length];
-	core_header:				Data_Header;	
+	# I think this is another definite length encoded value.
-        core_data:        			padding[core_header.length - 4]; 				# skip this data
+	user_data_length:                   uint32;
-	network_header:				Data_Header;	
+	gcc_connection_data:                GCC_Server_Connection_Data;
-        net_data:         			padding[network_header.length - 4]; 				# skip this data 
+	gcc_create_response:                GCC_Server_Create_Response;
-	security_header:			Data_Header;	
+	data_blocks:                        Data_Block[] &until($input.length() == 0);
-        security_data:    			Server_Security_Data;						
+} &byteorder=littleendian;
 };
 type GCC_Server_Connection_Data = record {
-	key_object_length:      	uint16;
+	key_object_length:        uint16;
-        key_object:             	uint8[key_object_length];
+	key_object:               uint8[key_object_length];
-        connect_data_connect_pdu:       uint8;
+	connect_data_connect_pdu: uint8;
 } &byteorder=bigendian;
 type GCC_Server_Create_Response = record {
-	extension_bit:			uint8;
+	extension_bit:           uint8;
-	node_id:			uint8[2];
+	node_id:                 uint16;
-	tag_length:			uint8;
+	tag_length:              uint8;
-	tag:				uint8;
+	tag:                     uint8;
-	result:				uint8;
+	result:                  uint8;
-	number_user_data_sets:		uint8;
+	number_user_data_sets:   uint8;
-	user_data_value_present:	uint8;
+	user_data_value_present: uint8;
-	h221_nonstandard_length:	uint8;
+	h221_nonstandard_length: uint8;
-	h221_nonstandard_key:		RE/McDn/; # &check would be better here, but it is not implemented
+	h221_nonstandard_key:    RE/McDn/;
-	user_data_value_length:		uint16;
+	user_data_value_length:  uint16;
-};
+} &byteorder=bigendian;
 type Server_Core_Data = record {
-	version_major:			uint16;
+	version_major:              uint16;
-	version_minor:			uint16;
+	version_minor:              uint16;
-	client_requested_protocols:	uint32;
+	client_requested_protocols: uint32;
-};
+} &byteorder=littleendian;
 type Server_Network_Data = record {
-	mcs_channel_id:	uint16;
+	mcs_channel_id: uint16;
-	channel_count:	uint16;
+	channel_count:  uint16;
-};
+} &byteorder=littleendian;
 type Server_Security_Data = record {
-        encryption_method:      uint32;
+	encryption_method:      uint32;
-        encryption_level:       uint32;
+	encryption_level:       uint32;
-        server_random_length:   uint32 &byteorder=littleendian;
+	server_random_length:   uint32;
-        server_cert_length:     uint32 &byteorder=littleendian;
+	server_cert_length:     uint32;
-        server_random:          bytestring &length=server_random_length;
+	server_random:          bytestring &length=server_random_length;
-	server_certificate:	Server_Certificate;
+	server_certificate:     Server_Certificate &length=server_cert_length;
-};
+} &byteorder=littleendian;
 type Server_Certificate = record {
-        cert_type:           uint8;
+	version: uint32;
-        switch: case cert_type of {
+	switch:  case cert_type of {
-          0x01  ->      proprietary:    Server_Proprietary;
+		0x01 -> proprietary: Server_Proprietary;
-          0x02  ->      ssl:            SSL; 
+		0x02 -> x509:        X509;
-        };
+	};
 } &let {
 	cert_type: uint32 = version & 0x7FFFFFFF;
 	permanent_issue: bool = (version & 0x80000000) == 0;
 } &byteorder=littleendian;
 type Server_Proprietary = record {
-        cert_type:              uint8[3];               # remainder of cert_type value
+	signature_algorithm:    uint32;
-        signature_algorithm:    uint32;
+	key_algorithm:          uint32;
-        key_algorithm:          uint32;
+	public_key_blob_type:   uint16;
-        public_key_blob_type:   uint16;
+	public_key_blob_length: uint16;
-        public_key_blob_length: uint16;
+	public_key_blob:        Public_Key_Blob &length=public_key_blob_length;
-        public_key_blob:        Public_Key_Blob &length=public_key_blob_length;
+	signature_blob_type:    uint16;
-        signature_blob_type:    uint16;
+	signature_blob_length:  uint16;
-        signature_blob_length:  uint16;
+	signature_blob:         bytestring &length=signature_blob_length;
-        signature_blob:         bytestring &length=signature_blob_length;
+} &byteorder=littleendian;
 };
 type Public_Key_Blob = record {
-        magic:                  bytestring &length=4;
+	magic:           bytestring &length=4;
-        key_length:             uint32;
+	key_length:      uint32;
-        bit_length:             uint32;
+	bit_length:      uint32;
-        public_exponent:        uint32;
+	public_exponent: uint32;
-        modulus:                bytestring &length=key_length;
+	modulus:         bytestring &length=key_length;
-};
+} &byteorder=littleendian;
-type SSL = record {
+type X509 = record {
-	pad1:		padding[11];
+	pad1: padding[8];
-	x509_cert:	bytestring &restofdata &transient;	# send to x509 analyzer
+	cert: bytestring &restofdata;
-};
+} &byteorder=littleendian;
 ######################################################################
 # ASN.1 Encodings
@ -226,7 +270,7 @@ type ASN1EncodingMeta = record {
 	len:      uint8;
 	more_len: bytestring &length = long_len ? len & 0x7f : 0;
 } &let {
-	long_len: bool = len & 0x80;
+	long_len: bool = (len & 0x80) > 0;
 	length:   uint64 = long_len ? binary_to_int64(more_len) : len & 0x7f;
 };
@ -251,7 +295,7 @@ type ASN1Boolean = record {
 };
 type ASN1Enumerated = record {
-        encoding: ASN1Encoding;
+	encoding: ASN1Encoding;
 };
 ######################################################################
@ -261,7 +305,6 @@ type ASN1Enumerated = record {
 function binary_to_int64(bs: bytestring): int64
 	%{
 	int64 rval = 0;
 	for ( int i = 0; i < bs.length(); ++i )
 		{
 		uint64 byte = bs[i];
--- a/src/analyzer/protocol/rdp/rdp.pac
+++ b/src/analyzer/protocol/rdp/rdp.pac
@ -19,8 +19,7 @@ connection RDP_Conn(bro_analyzer: BroAnalyzer) {
 %include rdp-protocol.pac
 flow RDP_Flow(is_orig: bool) {
-	#flowunit = RDP_PDU(is_orig) withcontext(connection, this);
+	flowunit = TPKT(is_orig) withcontext(connection, this);
 	datagram = RDP_PDU(is_orig) withcontext(connection, this);
 };
 %include rdp-analyzer.pac
--- a/src/analyzer/protocol/rdp/types.bif
+++ b/src/analyzer/protocol/rdp/types.bif
@ -0,0 +1,5 @@
 module RDP;
 type EarlyCapabilityFlags: record;
 type ClientCoreData: record;
--- a/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-proprietary-encryption/rdp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-proprietary-encryption/rdp.log
@ -0,0 +1,11 @@
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	rdp
 #open	2015-03-04-17-59-16
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	cookie	keyboard_layout	client_build	client_name	client_dig_product_id	desktop_width	desktop_height	requested_color_depth	result	encryption_level	encryption_method
 #types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	string	string	string
 1193369797.582740	CjhGID4nQcgTWjvg4c	172.21.128.16	1312	10.226.24.52	3389	FTBCO\A70	English - United States	RDP 6.0	FROG-POND	(empty)	1152	864	32-bit	Success	High	128bit
 1193369795.014346	CXWv6p3arKYeMETxOg	172.21.128.16	1311	10.226.24.52	3389	FTBCO\A70	-	-	-	-	-	-	-	-	-	-
 #close	2015-03-04-17-59-16
--- a/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-to-ssl/rdp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-to-ssl/rdp.log
@ -0,0 +1,11 @@
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	rdp
 #open	2015-03-04-17-53-51
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	cookie	keyboard_layout	client_build	client_name	client_dig_product_id	desktop_width	desktop_height	requested_color_depth	result	encryption_level	encryption_method
 #types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	string	string	string
 1297551041.284715	CXWv6p3arKYeMETxOg	192.168.1.200	49206	192.168.1.150	3389	AWAKECODI	-	-	-	-	-	-	-	-	-	-
 1297551078.958821	CjhGID4nQcgTWjvg4c	192.168.1.200	49207	192.168.1.150	3389	AWAKECODI	-	-	-	-	-	-	-	-	-	-
 #close	2015-03-04-17-53-51
--- a/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-to-ssl/ssl.log
+++ b/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-to-ssl/ssl.log
@ -0,0 +1,11 @@
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	ssl
 #open	2015-03-04-17-53-51
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	version	cipher	curve	server_name	resumed	last_alert	next_protocol	established	cert_chain_fuids	client_cert_chain_fuids	subject	issuer	client_subject	client_issuer
 #types	time	string	addr	port	addr	port	string	string	string	string	bool	string	string	bool	vector[string]	vector[string]	string	string	string	string
 1297551044.626170	CXWv6p3arKYeMETxOg	192.168.1.200	49206	192.168.1.150	3389	TLSv10	TLS_RSA_WITH_AES_128_CBC_SHA	-	192.168.1.150	F	-	-	T	FQWlpb1SuS5r4ERXej	(empty)	CN=WIN2K8R2.awakecoding.ath.cx	CN=WIN2K8R2.awakecoding.ath.cx	-	-
 1297551078.965110	CjhGID4nQcgTWjvg4c	192.168.1.200	49207	192.168.1.150	3389	TLSv10	TLS_RSA_WITH_AES_128_CBC_SHA	-	192.168.1.150	F	-	-	T	F4ERrj2uG50Lwz8259	(empty)	CN=WIN2K8R2.awakecoding.ath.cx	CN=WIN2K8R2.awakecoding.ath.cx	-	-
 #close	2015-03-04-17-53-51
--- a/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-x509/rdp.log
+++ b/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-x509/rdp.log
@ -0,0 +1,10 @@
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	rdp
 #open	2015-03-04-17-56-41
 #fields	ts	uid	id.orig_h	id.orig_p	id.resp_h	id.resp_p	cookie	keyboard_layout	client_build	client_name	client_dig_product_id	desktop_width	desktop_height	requested_color_depth	result	encryption_level	encryption_method
 #types	time	string	addr	port	addr	port	string	string	string	string	string	count	count	string	string	string	string
 1423755598.202845	CXWv6p3arKYeMETxOg	192.168.1.1	54990	192.168.1.2	3389	JOHN-PC  	English - United States	RDP 8.1	JOHN-PC-LAPTOP	3c571ed0-3415-474b-ae94-74e151b	1920	1080	16bit	Success	Client compatible	128bit
 #close	2015-03-04-17-56-41
--- a/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-x509/x509.log
+++ b/testing/btest/Baseline/scripts.base.protocols.rdp.rdp-x509/x509.log
@ -0,0 +1,10 @@
 #separator \x09
 #set_separator	,
 #empty_field	(empty)
 #unset_field	-
 #path	x509
 #open	2015-03-04-17-56-41
 #fields	ts	id	certificate.version	certificate.serial	certificate.subject	certificate.issuer	certificate.not_valid_before	certificate.not_valid_after	certificate.key_alg	certificate.sig_alg	certificate.key_type	certificate.key_length	certificate.exponent	certificate.curve	san.dns	san.uri	san.email	san.ip	basic_constraints.ca	basic_constraints.path_len
 #types	time	string	count	string	string	string	time	time	string	string	string	count	string	string	vector[string]	vector[string]	vector[string]	vector[addr]	bool	count
 1423755602.103140	F71ADVSn3rOqVhNh1	3	59EB28CB02B1A0D4	L=TURNBKL+CN=SERVR	L=TURNBKL+CN=SERVR	1423664106.000000	1431388800.000000	rsaEncryption	sha1WithRSA	rsa	512	65537	-	-	-	-	-	T	0
 #close	2015-03-04-17-56-41
--- a/testing/btest/Traces/rdp/rdp-proprietary-encryption.pcap
+++ b/testing/btest/Traces/rdp/rdp-proprietary-encryption.pcap
--- a/testing/btest/Traces/rdp/nla_win7_win2k8r2.pcap
+++ b/testing/btest/Traces/rdp/nla_win7_win2k8r2.pcap
--- a/testing/btest/Traces/rdp/rdp-x509.pcap
+++ b/testing/btest/Traces/rdp/rdp-x509.pcap
--- a/testing/btest/scripts/base/protocols/rdp/rdp-proprietary-encryption.bro
+++ b/testing/btest/scripts/base/protocols/rdp/rdp-proprietary-encryption.bro
@ -0,0 +1,4 @@
 # @TEST-EXEC: bro -r $TRACES/rdp/rdp-proprietary-encryption.pcap %INPUT
 # @TEST-EXEC: btest-diff rdp.log
@load base/protocols/rdp
--- a/testing/btest/scripts/base/protocols/rdp/rdp-to-ssl.bro
+++ b/testing/btest/scripts/base/protocols/rdp/rdp-to-ssl.bro
@ -0,0 +1,5 @@
 # @TEST-EXEC: bro -r $TRACES/rdp/rdp-to-ssl.pcap %INPUT
 # @TEST-EXEC: btest-diff rdp.log
 # @TEST-EXEC: btest-diff ssl.log
@load base/protocols/rdp
--- a/testing/btest/scripts/base/protocols/rdp/rdp-x509.bro
+++ b/testing/btest/scripts/base/protocols/rdp/rdp-x509.bro
@ -0,0 +1,5 @@
 # @TEST-EXEC: bro -r $TRACES/rdp/rdp-x509.pcap %INPUT
 # @TEST-EXEC: btest-diff rdp.log
 # @TEST-EXEC: btest-diff x509.log
@load base/protocols/rdp