mirror of
https://github.com/zeek/zeek.git
synced 2025-10-02 14:48:21 +00:00
Merge remote-tracking branch 'origin/topic/timw/cleaner-utf8'
* origin/topic/timw/cleaner-utf8: GHI-486: Switch over to using LLVM utf8-checking code to better validate characters I addressed a buffer over-read during the merge and added test-cases for it.
This commit is contained in:
commit
486bf1e713
3 changed files with 57 additions and 53 deletions
|
@ -2,11 +2,23 @@
|
|||
{"s":"\b\f\n\r\t\\x00\\x15"}
|
||||
{"s":"ñ"}
|
||||
{"s":"\\xc3("}
|
||||
{"s":"\\xc0\\x81"}
|
||||
{"s":"\\xc1\\x81"}
|
||||
{"s":"\\xc2\\xcf"}
|
||||
{"s":"\\xa0\\xa1"}
|
||||
{"s":"₡"}
|
||||
{"s":"࣡"}
|
||||
{"s":"\\xe0\\x80\\xa1"}
|
||||
{"s":"\\xe2(\\xa1"}
|
||||
{"s":"\\xed\\xa0\\xa1"}
|
||||
{"s":"\\xe2\\x82("}
|
||||
{"s":"𐌼"}
|
||||
{"s":"\\xf0(\\x8c\\xbc"}
|
||||
{"s":""}
|
||||
{"s":""}
|
||||
{"s":"\\xf0\\x80\\x8c\\xbc"}
|
||||
{"s":"\\xf2(\\x8c\\xbc"}
|
||||
{"s":"\\xf4\\x90\\x8c\\xbc"}
|
||||
{"s":"\\xf0\\x90(\\xbc"}
|
||||
{"s":"\\xf0(\\x8c("}
|
||||
{"s":"\\xf4\\x80\\x8c"}
|
||||
{"s":"\\xf0"}
|
||||
|
|
|
@ -27,33 +27,50 @@ event zeek_init()
|
|||
Log::write(SSH::LOG, [$s="a"]);
|
||||
Log::write(SSH::LOG, [$s="\b\f\n\r\t\x00\x15"]);
|
||||
|
||||
# Table 3-7 in https://www.unicode.org/versions/Unicode12.0.0/ch03.pdf describes what is
|
||||
# valid and invalid for the tests below
|
||||
|
||||
# Valid 2 Octet Sequence
|
||||
Log::write(SSH::LOG, [$s="\xc3\xb1"]);
|
||||
|
||||
|
||||
# Invalid 2 Octet Sequence
|
||||
Log::write(SSH::LOG, [$s="\xc3\x28"]);
|
||||
|
||||
Log::write(SSH::LOG, [$s="\xc0\x81"]);
|
||||
Log::write(SSH::LOG, [$s="\xc1\x81"]);
|
||||
Log::write(SSH::LOG, [$s="\xc2\xcf"]);
|
||||
|
||||
# Invalid Sequence Identifier
|
||||
Log::write(SSH::LOG, [$s="\xa0\xa1"]);
|
||||
|
||||
|
||||
# Valid 3 Octet Sequence
|
||||
Log::write(SSH::LOG, [$s="\xe2\x82\xa1"]);
|
||||
|
||||
Log::write(SSH::LOG, [$s="\xe0\xa3\xa1"]);
|
||||
|
||||
# Invalid 3 Octet Sequence (in 2nd Octet)
|
||||
Log::write(SSH::LOG, [$s="\xe0\x80\xa1"]);
|
||||
Log::write(SSH::LOG, [$s="\xe2\x28\xa1"]);
|
||||
|
||||
Log::write(SSH::LOG, [$s="\xed\xa0\xa1"]);
|
||||
|
||||
# Invalid 3 Octet Sequence (in 3rd Octet)
|
||||
Log::write(SSH::LOG, [$s="\xe2\x82\x28"]);
|
||||
|
||||
|
||||
# Valid 4 Octet Sequence
|
||||
Log::write(SSH::LOG, [$s="\xf0\x90\x8c\xbc"]);
|
||||
|
||||
Log::write(SSH::LOG, [$s="\xf1\x80\x8c\xbc"]);
|
||||
Log::write(SSH::LOG, [$s="\xf4\x80\x8c\xbc"]);
|
||||
|
||||
# Invalid 4 Octet Sequence (in 2nd Octet)
|
||||
Log::write(SSH::LOG, [$s="\xf0\x28\x8c\xbc"]);
|
||||
|
||||
Log::write(SSH::LOG, [$s="\xf0\x80\x8c\xbc"]);
|
||||
Log::write(SSH::LOG, [$s="\xf2\x28\x8c\xbc"]);
|
||||
Log::write(SSH::LOG, [$s="\xf4\x90\x8c\xbc"]);
|
||||
|
||||
# Invalid 4 Octet Sequence (in 3rd Octet)
|
||||
Log::write(SSH::LOG, [$s="\xf0\x90\x28\xbc"]);
|
||||
|
||||
|
||||
# Invalid 4 Octet Sequence (in 4th Octet)
|
||||
Log::write(SSH::LOG, [$s="\xf0\x28\x8c\x28"]);
|
||||
|
||||
# Invalid 4 Octet Sequence (too short)
|
||||
Log::write(SSH::LOG, [$s="\xf4\x80\x8c"]);
|
||||
Log::write(SSH::LOG, [$s="\xf0"]);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue