mirror of
https://github.com/zeek/zeek.git
synced 2025-10-01 22:28:20 +00:00
src/3rdparty: Add 3rdparty files from Zeek's src/
This commit is contained in:
parent
f0cfaaaa78
commit
982d3b56a1
12 changed files with 3909 additions and 0 deletions
755
src/3rdparty/ConvertUTF.c
vendored
Normal file
755
src/3rdparty/ConvertUTF.c
vendored
Normal file
|
@ -0,0 +1,755 @@
|
||||||
|
/*===--- ConvertUTF.c - Universal Character Names conversions ---------------===
|
||||||
|
*
|
||||||
|
* The LLVM Compiler Infrastructure
|
||||||
|
*
|
||||||
|
* This file is distributed under the University of Illinois Open Source
|
||||||
|
* License:
|
||||||
|
*
|
||||||
|
* University of Illinois/NCSA
|
||||||
|
* Open Source License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed by:
|
||||||
|
*
|
||||||
|
* LLVM Team
|
||||||
|
*
|
||||||
|
* University of Illinois at Urbana-Champaign
|
||||||
|
*
|
||||||
|
* http://llvm.org
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal with the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above
|
||||||
|
* copyright notice, this list of conditions and the
|
||||||
|
* following disclaimers.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the
|
||||||
|
* above copyright notice, this list of conditions and
|
||||||
|
* the following disclaimers in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the names of the LLVM Team, University of
|
||||||
|
* Illinois at Urbana-Champaign, nor the names of its
|
||||||
|
* contributors may be used to endorse or promote
|
||||||
|
* products derived from this Software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR
|
||||||
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*===------------------------------------------------------------------------=*/
|
||||||
|
/*
|
||||||
|
* Copyright 2001-2004 Unicode, Inc.
|
||||||
|
*
|
||||||
|
* Disclaimer
|
||||||
|
*
|
||||||
|
* This source code is provided as is by Unicode, Inc. No claims are
|
||||||
|
* made as to fitness for any particular purpose. No warranties of any
|
||||||
|
* kind are expressed or implied. The recipient agrees to determine
|
||||||
|
* applicability of information provided. If this file has been
|
||||||
|
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||||
|
* sole remedy for any claim will be exchange of defective media
|
||||||
|
* within 90 days of receipt.
|
||||||
|
*
|
||||||
|
* Limitations on Rights to Redistribute This Code
|
||||||
|
*
|
||||||
|
* Unicode, Inc. hereby grants the right to freely use the information
|
||||||
|
* supplied in this file in the creation of products supporting the
|
||||||
|
* Unicode Standard, and to make copies of this file in any form
|
||||||
|
* for internal or external distribution as long as this notice
|
||||||
|
* remains attached.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
Conversions between UTF32, UTF-16, and UTF-8. Source code file.
|
||||||
|
Author: Mark E. Davis, 1994.
|
||||||
|
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||||
|
Sept 2001: fixed const & error conditions per
|
||||||
|
mods suggested by S. Parent & A. Lillich.
|
||||||
|
June 2002: Tim Dodd added detection and handling of incomplete
|
||||||
|
source sequences, enhanced error detection, added casts
|
||||||
|
to eliminate compiler warnings.
|
||||||
|
July 2003: slight mods to back out aggressive FFFE detection.
|
||||||
|
Jan 2004: updated switches in from-UTF8 conversions.
|
||||||
|
Oct 2004: updated to use UNI_MAX_LEGAL_UTF32 in UTF-32 conversions.
|
||||||
|
|
||||||
|
See the header file "ConvertUTF.h" for complete documentation.
|
||||||
|
|
||||||
|
------------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
|
||||||
|
#include "ConvertUTF.h"
|
||||||
|
#ifdef CVTUTF_DEBUG
|
||||||
|
#include <stdio.h>
|
||||||
|
#endif
|
||||||
|
#include <assert.h>
|
||||||
|
|
||||||
|
static const int halfShift = 10; /* used for shifting by 10 bits */
|
||||||
|
|
||||||
|
static const UTF32 halfBase = 0x0010000UL;
|
||||||
|
static const UTF32 halfMask = 0x3FFUL;
|
||||||
|
|
||||||
|
#define UNI_SUR_HIGH_START (UTF32)0xD800
|
||||||
|
#define UNI_SUR_HIGH_END (UTF32)0xDBFF
|
||||||
|
#define UNI_SUR_LOW_START (UTF32)0xDC00
|
||||||
|
#define UNI_SUR_LOW_END (UTF32)0xDFFF
|
||||||
|
#define false 0
|
||||||
|
#define true 1
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Index into the table below with the first byte of a UTF-8 sequence to
|
||||||
|
* get the number of trailing bytes that are supposed to follow it.
|
||||||
|
* Note that *legal* UTF-8 values can't have 4 or 5-bytes. The table is
|
||||||
|
* left as-is for anyone who may want to do such conversion, which was
|
||||||
|
* allowed in earlier algorithms.
|
||||||
|
*/
|
||||||
|
static const char trailingBytesForUTF8[256] = {
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
|
||||||
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
||||||
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Magic values subtracted from a buffer value during UTF8 conversion.
|
||||||
|
* This table contains as many values as there might be trailing bytes
|
||||||
|
* in a UTF-8 sequence.
|
||||||
|
*/
|
||||||
|
static const UTF32 offsetsFromUTF8[6] = { 0x00000000UL, 0x00003080UL, 0x000E2080UL,
|
||||||
|
0x03C82080UL, 0xFA082080UL, 0x82082080UL };
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Once the bits are split out into bytes of UTF-8, this is a mask OR-ed
|
||||||
|
* into the first byte, depending on how many bytes follow. There are
|
||||||
|
* as many entries in this table as there are UTF-8 sequence types.
|
||||||
|
* (I.e., one byte sequence, two byte... etc.). Remember that sequencs
|
||||||
|
* for *legal* UTF-8 will be 4 or fewer bytes total.
|
||||||
|
*/
|
||||||
|
static const UTF8 firstByteMark[7] = { 0x00, 0x00, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC };
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/* The interface converts a whole buffer to avoid function-call overhead.
|
||||||
|
* Constants have been gathered. Loops & conditionals have been removed as
|
||||||
|
* much as possible for efficiency, in favor of drop-through switches.
|
||||||
|
* (See "Note A" at the bottom of the file for equivalent code.)
|
||||||
|
* If your compiler supports it, the "isLegalUTF8" call can be turned
|
||||||
|
* into an inline function.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF32toUTF16 (
|
||||||
|
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||||
|
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF32* source = *sourceStart;
|
||||||
|
UTF16* target = *targetStart;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
UTF32 ch;
|
||||||
|
if (target >= targetEnd) {
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
ch = *source++;
|
||||||
|
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||||
|
/* UTF-16 surrogate values are illegal in UTF-32; 0xffff or 0xfffe are both reserved values */
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*target++ = (UTF16)ch; /* normal case */
|
||||||
|
}
|
||||||
|
} else if (ch > UNI_MAX_LEGAL_UTF32) {
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
result = sourceIllegal;
|
||||||
|
} else {
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||||
|
if (target + 1 >= targetEnd) {
|
||||||
|
--source; /* Back up source pointer! */
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
ch -= halfBase;
|
||||||
|
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||||
|
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF16toUTF32 (
|
||||||
|
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||||
|
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF16* source = *sourceStart;
|
||||||
|
UTF32* target = *targetStart;
|
||||||
|
UTF32 ch, ch2;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||||
|
ch = *source++;
|
||||||
|
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||||
|
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||||
|
if (source < sourceEnd) {
|
||||||
|
ch2 = *source;
|
||||||
|
/* If it's a low surrogate, convert to UTF32. */
|
||||||
|
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||||
|
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||||
|
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||||
|
++source;
|
||||||
|
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||||
|
--source; /* return to the high surrogate */
|
||||||
|
result = sourceExhausted;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (flags == strictConversion) {
|
||||||
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||||
|
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (target >= targetEnd) {
|
||||||
|
source = oldSource; /* Back up source pointer! */
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
*target++ = ch;
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
#ifdef CVTUTF_DEBUG
|
||||||
|
if (result == sourceIllegal) {
|
||||||
|
fprintf(stderr, "ConvertUTF16toUTF32 illegal seq 0x%04x,%04x\n", ch, ch2);
|
||||||
|
fflush(stderr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
ConversionResult ConvertUTF16toUTF8 (
|
||||||
|
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||||
|
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF16* source = *sourceStart;
|
||||||
|
UTF8* target = *targetStart;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
UTF32 ch;
|
||||||
|
unsigned short bytesToWrite = 0;
|
||||||
|
const UTF32 byteMask = 0xBF;
|
||||||
|
const UTF32 byteMark = 0x80;
|
||||||
|
const UTF16* oldSource = source; /* In case we have to back up because of target overflow. */
|
||||||
|
ch = *source++;
|
||||||
|
/* If we have a surrogate pair, convert to UTF32 first. */
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) {
|
||||||
|
/* If the 16 bits following the high surrogate are in the source buffer... */
|
||||||
|
if (source < sourceEnd) {
|
||||||
|
UTF32 ch2 = *source;
|
||||||
|
/* If it's a low surrogate, convert to UTF32. */
|
||||||
|
if (ch2 >= UNI_SUR_LOW_START && ch2 <= UNI_SUR_LOW_END) {
|
||||||
|
ch = ((ch - UNI_SUR_HIGH_START) << halfShift)
|
||||||
|
+ (ch2 - UNI_SUR_LOW_START) + halfBase;
|
||||||
|
++source;
|
||||||
|
} else if (flags == strictConversion) { /* it's an unpaired high surrogate */
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else { /* We don't have the 16 bits following the high surrogate. */
|
||||||
|
--source; /* return to the high surrogate */
|
||||||
|
result = sourceExhausted;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else if (flags == strictConversion) {
|
||||||
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||||
|
if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Figure out how many bytes the result will require */
|
||||||
|
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||||
|
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||||
|
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||||
|
} else if (ch < (UTF32)0x110000) { bytesToWrite = 4;
|
||||||
|
} else { bytesToWrite = 3;
|
||||||
|
ch = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
|
||||||
|
target += bytesToWrite;
|
||||||
|
if (target > targetEnd) {
|
||||||
|
source = oldSource; /* Back up source pointer! */
|
||||||
|
target -= bytesToWrite; result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
switch (bytesToWrite) { /* note: everything falls through. */
|
||||||
|
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 1: *--target = (UTF8)(ch | firstByteMark[bytesToWrite]);
|
||||||
|
}
|
||||||
|
target += bytesToWrite;
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF32toUTF8 (
|
||||||
|
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||||
|
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF32* source = *sourceStart;
|
||||||
|
UTF8* target = *targetStart;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
UTF32 ch;
|
||||||
|
unsigned short bytesToWrite = 0;
|
||||||
|
const UTF32 byteMask = 0xBF;
|
||||||
|
const UTF32 byteMark = 0x80;
|
||||||
|
ch = *source++;
|
||||||
|
if (flags == strictConversion ) {
|
||||||
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
--source; /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Figure out how many bytes the result will require. Turn any
|
||||||
|
* illegally large UTF32 things (> Plane 17) into replacement chars.
|
||||||
|
*/
|
||||||
|
if (ch < (UTF32)0x80) { bytesToWrite = 1;
|
||||||
|
} else if (ch < (UTF32)0x800) { bytesToWrite = 2;
|
||||||
|
} else if (ch < (UTF32)0x10000) { bytesToWrite = 3;
|
||||||
|
} else if (ch <= UNI_MAX_LEGAL_UTF32) { bytesToWrite = 4;
|
||||||
|
} else { bytesToWrite = 3;
|
||||||
|
ch = UNI_REPLACEMENT_CHAR;
|
||||||
|
result = sourceIllegal;
|
||||||
|
}
|
||||||
|
|
||||||
|
target += bytesToWrite;
|
||||||
|
if (target > targetEnd) {
|
||||||
|
--source; /* Back up source pointer! */
|
||||||
|
target -= bytesToWrite; result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
switch (bytesToWrite) { /* note: everything falls through. */
|
||||||
|
case 4: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 3: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 2: *--target = (UTF8)((ch | byteMark) & byteMask); ch >>= 6;
|
||||||
|
case 1: *--target = (UTF8) (ch | firstByteMark[bytesToWrite]);
|
||||||
|
}
|
||||||
|
target += bytesToWrite;
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Utility routine to tell whether a sequence of bytes is legal UTF-8.
|
||||||
|
* This must be called with the length pre-determined by the first byte.
|
||||||
|
* If not calling this from ConvertUTF8to*, then the length can be set by:
|
||||||
|
* length = trailingBytesForUTF8[*source]+1;
|
||||||
|
* and the sequence is illegal right away if there aren't that many bytes
|
||||||
|
* available.
|
||||||
|
* If presented with a length > 4, this returns false. The Unicode
|
||||||
|
* definition of UTF-8 goes up to 4-byte sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static Boolean isLegalUTF8(const UTF8 *source, int length) {
|
||||||
|
UTF8 a;
|
||||||
|
const UTF8 *srcptr = source+length;
|
||||||
|
switch (length) {
|
||||||
|
default: return false;
|
||||||
|
/* Everything else falls through when "true"... */
|
||||||
|
case 4: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||||
|
case 3: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||||
|
case 2: if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return false;
|
||||||
|
|
||||||
|
switch (*source) {
|
||||||
|
/* no fall-through in this inner switch */
|
||||||
|
case 0xE0: if (a < 0xA0) return false; break;
|
||||||
|
case 0xED: if (a > 0x9F) return false; break;
|
||||||
|
case 0xF0: if (a < 0x90) return false; break;
|
||||||
|
case 0xF4: if (a > 0x8F) return false; break;
|
||||||
|
default: if (a < 0x80) return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
case 1: if (*source >= 0x80 && *source < 0xC2) return false;
|
||||||
|
}
|
||||||
|
if (*source > 0xF4) return false;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exported function to return whether a UTF-8 sequence is legal or not.
|
||||||
|
* This is not used here; it's just exported.
|
||||||
|
*/
|
||||||
|
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
|
||||||
|
int length = trailingBytesForUTF8[*source]+1;
|
||||||
|
if (length > sourceEnd - source) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return isLegalUTF8(source, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static unsigned
|
||||||
|
findMaximalSubpartOfIllFormedUTF8Sequence(const UTF8 *source,
|
||||||
|
const UTF8 *sourceEnd) {
|
||||||
|
UTF8 b1, b2, b3;
|
||||||
|
|
||||||
|
assert(!isLegalUTF8Sequence(source, sourceEnd));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Unicode 6.3.0, D93b:
|
||||||
|
*
|
||||||
|
* Maximal subpart of an ill-formed subsequence: The longest code unit
|
||||||
|
* subsequence starting at an unconvertible offset that is either:
|
||||||
|
* a. the initial subsequence of a well-formed code unit sequence, or
|
||||||
|
* b. a subsequence of length one.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (source == sourceEnd)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform case analysis. See Unicode 6.3.0, Table 3-7. Well-Formed UTF-8
|
||||||
|
* Byte Sequences.
|
||||||
|
*/
|
||||||
|
|
||||||
|
b1 = *source;
|
||||||
|
++source;
|
||||||
|
if (b1 >= 0xC2 && b1 <= 0xDF) {
|
||||||
|
/*
|
||||||
|
* First byte is valid, but we know that this code unit sequence is
|
||||||
|
* invalid, so the maximal subpart has to end after the first byte.
|
||||||
|
*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (source == sourceEnd)
|
||||||
|
return 1;
|
||||||
|
|
||||||
|
b2 = *source;
|
||||||
|
++source;
|
||||||
|
|
||||||
|
if (b1 == 0xE0) {
|
||||||
|
return (b2 >= 0xA0 && b2 <= 0xBF) ? 2 : 1;
|
||||||
|
}
|
||||||
|
if (b1 >= 0xE1 && b1 <= 0xEC) {
|
||||||
|
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
|
||||||
|
}
|
||||||
|
if (b1 == 0xED) {
|
||||||
|
return (b2 >= 0x80 && b2 <= 0x9F) ? 2 : 1;
|
||||||
|
}
|
||||||
|
if (b1 >= 0xEE && b1 <= 0xEF) {
|
||||||
|
return (b2 >= 0x80 && b2 <= 0xBF) ? 2 : 1;
|
||||||
|
}
|
||||||
|
if (b1 == 0xF0) {
|
||||||
|
if (b2 >= 0x90 && b2 <= 0xBF) {
|
||||||
|
if (source == sourceEnd)
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
b3 = *source;
|
||||||
|
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (b1 >= 0xF1 && b1 <= 0xF3) {
|
||||||
|
if (b2 >= 0x80 && b2 <= 0xBF) {
|
||||||
|
if (source == sourceEnd)
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
b3 = *source;
|
||||||
|
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
if (b1 == 0xF4) {
|
||||||
|
if (b2 >= 0x80 && b2 <= 0x8F) {
|
||||||
|
if (source == sourceEnd)
|
||||||
|
return 2;
|
||||||
|
|
||||||
|
b3 = *source;
|
||||||
|
return (b3 >= 0x80 && b3 <= 0xBF) ? 3 : 2;
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
assert((b1 >= 0x80 && b1 <= 0xC1) || b1 >= 0xF5);
|
||||||
|
/*
|
||||||
|
* There are no valid sequences that start with these bytes. Maximal subpart
|
||||||
|
* is defined to have length 1 in these cases.
|
||||||
|
*/
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exported function to return the total number of bytes in a codepoint
|
||||||
|
* represented in UTF-8, given the value of the first byte.
|
||||||
|
*/
|
||||||
|
unsigned getNumBytesForUTF8(UTF8 first) {
|
||||||
|
return trailingBytesForUTF8[first] + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exported function to return whether a UTF-8 string is legal or not.
|
||||||
|
* This is not used here; it's just exported.
|
||||||
|
*/
|
||||||
|
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
|
||||||
|
while (*source != sourceEnd) {
|
||||||
|
int length = trailingBytesForUTF8[**source] + 1;
|
||||||
|
if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
|
||||||
|
return false;
|
||||||
|
*source += length;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF8toUTF16 (
|
||||||
|
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||||
|
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF8* source = *sourceStart;
|
||||||
|
UTF16* target = *targetStart;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
UTF32 ch = 0;
|
||||||
|
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||||
|
if (extraBytesToRead >= sourceEnd - source) {
|
||||||
|
result = sourceExhausted; break;
|
||||||
|
}
|
||||||
|
/* Do this check whether lenient or strict */
|
||||||
|
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* The cases all fall through. See "Note A" below.
|
||||||
|
*/
|
||||||
|
switch (extraBytesToRead) {
|
||||||
|
case 5: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||||
|
case 4: ch += *source++; ch <<= 6; /* remember, illegal UTF-8 */
|
||||||
|
case 3: ch += *source++; ch <<= 6;
|
||||||
|
case 2: ch += *source++; ch <<= 6;
|
||||||
|
case 1: ch += *source++; ch <<= 6;
|
||||||
|
case 0: ch += *source++;
|
||||||
|
}
|
||||||
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||||
|
|
||||||
|
if (target >= targetEnd) {
|
||||||
|
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
if (ch <= UNI_MAX_BMP) { /* Target is a character <= 0xFFFF */
|
||||||
|
/* UTF-16 surrogate values are illegal in UTF-32 */
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*target++ = (UTF16)ch; /* normal case */
|
||||||
|
}
|
||||||
|
} else if (ch > UNI_MAX_UTF16) {
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
result = sourceIllegal;
|
||||||
|
source -= (extraBytesToRead+1); /* return to the start */
|
||||||
|
break; /* Bail out; shouldn't continue */
|
||||||
|
} else {
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
/* target is a character in range 0xFFFF - 0x10FFFF. */
|
||||||
|
if (target + 1 >= targetEnd) {
|
||||||
|
source -= (extraBytesToRead+1); /* Back up source pointer! */
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
ch -= halfBase;
|
||||||
|
*target++ = (UTF16)((ch >> halfShift) + UNI_SUR_HIGH_START);
|
||||||
|
*target++ = (UTF16)((ch & halfMask) + UNI_SUR_LOW_START);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
static ConversionResult ConvertUTF8toUTF32Impl(
|
||||||
|
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||||
|
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags,
|
||||||
|
Boolean InputIsPartial) {
|
||||||
|
ConversionResult result = conversionOK;
|
||||||
|
const UTF8* source = *sourceStart;
|
||||||
|
UTF32* target = *targetStart;
|
||||||
|
while (source < sourceEnd) {
|
||||||
|
UTF32 ch = 0;
|
||||||
|
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
|
||||||
|
if (extraBytesToRead >= sourceEnd - source) {
|
||||||
|
if (flags == strictConversion || InputIsPartial) {
|
||||||
|
result = sourceExhausted;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
result = sourceIllegal;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Replace the maximal subpart of ill-formed sequence with
|
||||||
|
* replacement character.
|
||||||
|
*/
|
||||||
|
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
|
||||||
|
sourceEnd);
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (target >= targetEnd) {
|
||||||
|
result = targetExhausted; break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Do this check whether lenient or strict */
|
||||||
|
if (!isLegalUTF8(source, extraBytesToRead+1)) {
|
||||||
|
result = sourceIllegal;
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
/* Abort conversion. */
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
/*
|
||||||
|
* Replace the maximal subpart of ill-formed sequence with
|
||||||
|
* replacement character.
|
||||||
|
*/
|
||||||
|
source += findMaximalSubpartOfIllFormedUTF8Sequence(source,
|
||||||
|
sourceEnd);
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* The cases all fall through. See "Note A" below.
|
||||||
|
*/
|
||||||
|
switch (extraBytesToRead) {
|
||||||
|
case 5: ch += *source++; ch <<= 6;
|
||||||
|
case 4: ch += *source++; ch <<= 6;
|
||||||
|
case 3: ch += *source++; ch <<= 6;
|
||||||
|
case 2: ch += *source++; ch <<= 6;
|
||||||
|
case 1: ch += *source++; ch <<= 6;
|
||||||
|
case 0: ch += *source++;
|
||||||
|
}
|
||||||
|
ch -= offsetsFromUTF8[extraBytesToRead];
|
||||||
|
|
||||||
|
if (ch <= UNI_MAX_LEGAL_UTF32) {
|
||||||
|
/*
|
||||||
|
* UTF-16 surrogate values are illegal in UTF-32, and anything
|
||||||
|
* over Plane 17 (> 0x10FFFF) is illegal.
|
||||||
|
*/
|
||||||
|
if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_LOW_END) {
|
||||||
|
if (flags == strictConversion) {
|
||||||
|
source -= (extraBytesToRead+1); /* return to the illegal value itself */
|
||||||
|
result = sourceIllegal;
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
*target++ = ch;
|
||||||
|
}
|
||||||
|
} else { /* i.e., ch > UNI_MAX_LEGAL_UTF32 */
|
||||||
|
result = sourceIllegal;
|
||||||
|
*target++ = UNI_REPLACEMENT_CHAR;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*sourceStart = source;
|
||||||
|
*targetStart = target;
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF8toUTF32Partial(const UTF8 **sourceStart,
|
||||||
|
const UTF8 *sourceEnd,
|
||||||
|
UTF32 **targetStart,
|
||||||
|
UTF32 *targetEnd,
|
||||||
|
ConversionFlags flags) {
|
||||||
|
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
|
||||||
|
flags, /*InputIsPartial=*/true);
|
||||||
|
}
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF8toUTF32(const UTF8 **sourceStart,
|
||||||
|
const UTF8 *sourceEnd, UTF32 **targetStart,
|
||||||
|
UTF32 *targetEnd, ConversionFlags flags) {
|
||||||
|
return ConvertUTF8toUTF32Impl(sourceStart, sourceEnd, targetStart, targetEnd,
|
||||||
|
flags, /*InputIsPartial=*/false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
Note A.
|
||||||
|
The fall-through switches in UTF-8 reading code save a
|
||||||
|
temp variable, some decrements & conditionals. The switches
|
||||||
|
are equivalent to the following loop:
|
||||||
|
{
|
||||||
|
int tmpBytesToRead = extraBytesToRead+1;
|
||||||
|
do {
|
||||||
|
ch += *source++;
|
||||||
|
--tmpBytesToRead;
|
||||||
|
if (tmpBytesToRead) ch <<= 6;
|
||||||
|
} while (tmpBytesToRead > 0);
|
||||||
|
}
|
||||||
|
In UTF-8 writing code, the switches on "bytesToWrite" are
|
||||||
|
similarly unrolled loops.
|
||||||
|
|
||||||
|
--------------------------------------------------------------------- */
|
233
src/3rdparty/ConvertUTF.h
vendored
Normal file
233
src/3rdparty/ConvertUTF.h
vendored
Normal file
|
@ -0,0 +1,233 @@
|
||||||
|
/*===--- ConvertUTF.h - Universal Character Names conversions ---------------===
|
||||||
|
*
|
||||||
|
* The LLVM Compiler Infrastructure
|
||||||
|
*
|
||||||
|
* This file is distributed under the University of Illinois Open Source
|
||||||
|
* License:
|
||||||
|
*
|
||||||
|
* University of Illinois/NCSA
|
||||||
|
* Open Source License
|
||||||
|
*
|
||||||
|
* Copyright (c) 2003-2014 University of Illinois at Urbana-Champaign.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* Developed by:
|
||||||
|
*
|
||||||
|
* LLVM Team
|
||||||
|
*
|
||||||
|
* University of Illinois at Urbana-Champaign
|
||||||
|
*
|
||||||
|
* http://llvm.org
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal with the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or
|
||||||
|
* sell copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* * Redistributions of source code must retain the above
|
||||||
|
* copyright notice, this list of conditions and the
|
||||||
|
* following disclaimers.
|
||||||
|
*
|
||||||
|
* * Redistributions in binary form must reproduce the
|
||||||
|
* above copyright notice, this list of conditions and
|
||||||
|
* the following disclaimers in the documentation and/or
|
||||||
|
* other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* * Neither the names of the LLVM Team, University of
|
||||||
|
* Illinois at Urbana-Champaign, nor the names of its
|
||||||
|
* contributors may be used to endorse or promote
|
||||||
|
* products derived from this Software without specific
|
||||||
|
* prior written permission.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR
|
||||||
|
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
|
||||||
|
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
|
||||||
|
* USE OR OTHER DEALINGS WITH THE SOFTWARE.
|
||||||
|
*
|
||||||
|
*==------------------------------------------------------------------------==*/
|
||||||
|
/*
|
||||||
|
* Copyright 2001-2004 Unicode, Inc.
|
||||||
|
*
|
||||||
|
* Disclaimer
|
||||||
|
*
|
||||||
|
* This source code is provided as is by Unicode, Inc. No claims are
|
||||||
|
* made as to fitness for any particular purpose. No warranties of any
|
||||||
|
* kind are expressed or implied. The recipient agrees to determine
|
||||||
|
* applicability of information provided. If this file has been
|
||||||
|
* purchased on magnetic or optical media from Unicode, Inc., the
|
||||||
|
* sole remedy for any claim will be exchange of defective media
|
||||||
|
* within 90 days of receipt.
|
||||||
|
*
|
||||||
|
* Limitations on Rights to Redistribute This Code
|
||||||
|
*
|
||||||
|
* Unicode, Inc. hereby grants the right to freely use the information
|
||||||
|
* supplied in this file in the creation of products supporting the
|
||||||
|
* Unicode Standard, and to make copies of this file in any form
|
||||||
|
* for internal or external distribution as long as this notice
|
||||||
|
* remains attached.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
|
||||||
|
Conversions between UTF32, UTF-16, and UTF-8. Header file.
|
||||||
|
|
||||||
|
Several funtions are included here, forming a complete set of
|
||||||
|
conversions between the three formats. UTF-7 is not included
|
||||||
|
here, but is handled in a separate source file.
|
||||||
|
|
||||||
|
Each of these routines takes pointers to input buffers and output
|
||||||
|
buffers. The input buffers are const.
|
||||||
|
|
||||||
|
Each routine converts the text between *sourceStart and sourceEnd,
|
||||||
|
putting the result into the buffer between *targetStart and
|
||||||
|
targetEnd. Note: the end pointers are *after* the last item: e.g.
|
||||||
|
*(sourceEnd - 1) is the last item.
|
||||||
|
|
||||||
|
!!! NOTE: The source and end pointers must be aligned properly !!!
|
||||||
|
|
||||||
|
The return result indicates whether the conversion was successful,
|
||||||
|
and if not, whether the problem was in the source or target buffers.
|
||||||
|
(Only the first encountered problem is indicated.)
|
||||||
|
|
||||||
|
After the conversion, *sourceStart and *targetStart are both
|
||||||
|
updated to point to the end of last text successfully converted in
|
||||||
|
the respective buffers.
|
||||||
|
|
||||||
|
Input parameters:
|
||||||
|
sourceStart - pointer to a pointer to the source buffer.
|
||||||
|
The contents of this are modified on return so that
|
||||||
|
it points at the next thing to be converted.
|
||||||
|
targetStart - similarly, pointer to pointer to the target buffer.
|
||||||
|
sourceEnd, targetEnd - respectively pointers to the ends of the
|
||||||
|
two buffers, for overflow checking only.
|
||||||
|
|
||||||
|
These conversion functions take a ConversionFlags argument. When this
|
||||||
|
flag is set to strict, both irregular sequences and isolated surrogates
|
||||||
|
will cause an error. When the flag is set to lenient, both irregular
|
||||||
|
sequences and isolated surrogates are converted.
|
||||||
|
|
||||||
|
Whether the flag is strict or lenient, all illegal sequences will cause
|
||||||
|
an error return. This includes sequences such as: <F4 90 80 80>, <C0 80>,
|
||||||
|
or <A0> in UTF-8, and values above 0x10FFFF in UTF-32. Conformant code
|
||||||
|
must check for illegal sequences.
|
||||||
|
|
||||||
|
When the flag is set to lenient, characters over 0x10FFFF are converted
|
||||||
|
to the replacement character; otherwise (when the flag is set to strict)
|
||||||
|
they constitute an error.
|
||||||
|
|
||||||
|
Output parameters:
|
||||||
|
The value "sourceIllegal" is returned from some routines if the input
|
||||||
|
sequence is malformed. When "sourceIllegal" is returned, the source
|
||||||
|
value will point to the illegal value that caused the problem. E.g.,
|
||||||
|
in UTF-8 when a sequence is malformed, it points to the start of the
|
||||||
|
malformed sequence.
|
||||||
|
|
||||||
|
Author: Mark E. Davis, 1994.
|
||||||
|
Rev History: Rick McGowan, fixes & updates May 2001.
|
||||||
|
Fixes & updates, Sept 2001.
|
||||||
|
|
||||||
|
------------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/* ---------------------------------------------------------------------
|
||||||
|
The following 4 definitions are compiler-specific.
|
||||||
|
The C standard does not guarantee that wchar_t has at least
|
||||||
|
16 bits, so wchar_t is no less portable than unsigned short!
|
||||||
|
All should be unsigned values to avoid sign extension during
|
||||||
|
bit mask & shift operations.
|
||||||
|
------------------------------------------------------------------------ */
|
||||||
|
|
||||||
|
typedef unsigned int UTF32; /* at least 32 bits */
|
||||||
|
typedef unsigned short UTF16; /* at least 16 bits */
|
||||||
|
typedef unsigned char UTF8; /* typically 8 bits */
|
||||||
|
typedef unsigned char Boolean; /* 0 or 1 */
|
||||||
|
|
||||||
|
/* Some fundamental constants */
|
||||||
|
#define UNI_REPLACEMENT_CHAR (UTF32)0x0000FFFD
|
||||||
|
#define UNI_MAX_BMP (UTF32)0x0000FFFF
|
||||||
|
#define UNI_MAX_UTF16 (UTF32)0x0010FFFF
|
||||||
|
#define UNI_MAX_UTF32 (UTF32)0x7FFFFFFF
|
||||||
|
#define UNI_MAX_LEGAL_UTF32 (UTF32)0x0010FFFF
|
||||||
|
|
||||||
|
#define UNI_MAX_UTF8_BYTES_PER_CODE_POINT 4
|
||||||
|
|
||||||
|
#define UNI_UTF16_BYTE_ORDER_MARK_NATIVE 0xFEFF
|
||||||
|
#define UNI_UTF16_BYTE_ORDER_MARK_SWAPPED 0xFFFE
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
conversionOK, /* conversion successful */
|
||||||
|
sourceExhausted, /* partial character in source, but hit end */
|
||||||
|
targetExhausted, /* insuff. room in target for conversion */
|
||||||
|
sourceIllegal /* source sequence is illegal/malformed */
|
||||||
|
} ConversionResult;
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
strictConversion = 0,
|
||||||
|
lenientConversion
|
||||||
|
} ConversionFlags;
|
||||||
|
|
||||||
|
/* This is for C++ and does no harm in C */
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
ConversionResult ConvertUTF8toUTF16 (
|
||||||
|
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||||
|
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a partial UTF8 sequence to UTF32. If the sequence ends in an
|
||||||
|
* incomplete code unit sequence, returns \c sourceExhausted.
|
||||||
|
*/
|
||||||
|
ConversionResult ConvertUTF8toUTF32Partial(
|
||||||
|
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||||
|
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Convert a partial UTF8 sequence to UTF32. If the sequence ends in an
|
||||||
|
* incomplete code unit sequence, returns \c sourceIllegal.
|
||||||
|
*/
|
||||||
|
ConversionResult ConvertUTF8toUTF32(
|
||||||
|
const UTF8** sourceStart, const UTF8* sourceEnd,
|
||||||
|
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/* NOTE: The source and end pointers must be aligned properly. */
|
||||||
|
ConversionResult ConvertUTF16toUTF8 (
|
||||||
|
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||||
|
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/* NOTE: The source and end pointers must be aligned properly. */
|
||||||
|
ConversionResult ConvertUTF32toUTF8 (
|
||||||
|
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||||
|
UTF8** targetStart, UTF8* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/* NOTE: The source and end pointers must be aligned properly. */
|
||||||
|
ConversionResult ConvertUTF16toUTF32 (
|
||||||
|
const UTF16** sourceStart, const UTF16* sourceEnd,
|
||||||
|
UTF32** targetStart, UTF32* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
/* NOTE: The source and end pointers must be aligned properly. */
|
||||||
|
ConversionResult ConvertUTF32toUTF16 (
|
||||||
|
const UTF32** sourceStart, const UTF32* sourceEnd,
|
||||||
|
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
|
||||||
|
|
||||||
|
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
|
||||||
|
|
||||||
|
Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
|
||||||
|
|
||||||
|
unsigned getNumBytesForUTF8(UTF8 firstByte);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* --------------------------------------------------------------------- */
|
524
src/3rdparty/bsd-getopt-long.c
vendored
Normal file
524
src/3rdparty/bsd-getopt-long.c
vendored
Normal file
|
@ -0,0 +1,524 @@
|
||||||
|
/* $OpenBSD: getopt_long.c,v 1.17 2004/06/03 18:46:52 millert Exp $ */
|
||||||
|
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose with or without fee is hereby granted, provided that the above
|
||||||
|
* copyright notice and this permission notice appear in all copies.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL
|
||||||
|
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE
|
||||||
|
* FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
*/
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Dieter Baron and Thomas Klausner.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. All advertising materials mentioning features or use of this software
|
||||||
|
* must display the following acknowledgement:
|
||||||
|
* This product includes software developed by the NetBSD
|
||||||
|
* Foundation, Inc. and its contributors.
|
||||||
|
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define IN_GETOPT_LONG_C 1
|
||||||
|
|
||||||
|
#include <zeek/zeek-config.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
|
||||||
|
#ifndef HAVE_GETOPT_LONG
|
||||||
|
|
||||||
|
# include "bsd-getopt-long.h"
|
||||||
|
|
||||||
|
# ifdef WITH_DMALLOC
|
||||||
|
# include <dmalloc.h>
|
||||||
|
# endif
|
||||||
|
|
||||||
|
int pure_opterr = 1; /* if error message should be printed */
|
||||||
|
int pure_optind = 1; /* index into parent argv vector */
|
||||||
|
int pure_optopt = '?'; /* character checked for validity */
|
||||||
|
int pure_optreset; /* reset getopt */
|
||||||
|
const char *pure_optarg; /* argument associated with option */
|
||||||
|
|
||||||
|
# define PRINT_ERROR ((pure_opterr) && (*options != ':'))
|
||||||
|
|
||||||
|
# define FLAG_PERMUTE 0x01 /* permute non-options to the end of argv */
|
||||||
|
# define FLAG_ALLARGS 0x02 /* treat non-options as args to option "-1" */
|
||||||
|
# define FLAG_LONGONLY 0x04 /* operate as pure_getopt_long_only */
|
||||||
|
|
||||||
|
/* return values */
|
||||||
|
# define BADCH (int)'?'
|
||||||
|
# define BADARG ((*options == ':') ? (int)':' : (int)'?')
|
||||||
|
# define INORDER (int)1
|
||||||
|
|
||||||
|
# define EMSG ""
|
||||||
|
|
||||||
|
static int pure_getopt_internal(int, char * const *, const char *,
|
||||||
|
const struct pure_option *, int *, int);
|
||||||
|
static int pure_parse_long_options(char * const *, const char *,
|
||||||
|
const struct pure_option *, int *, int);
|
||||||
|
static int pure_gcd(int, int);
|
||||||
|
static void pure_permute_args(int, int, int, char * const *);
|
||||||
|
|
||||||
|
static const char *pure_place = EMSG; /* option letter processing */
|
||||||
|
|
||||||
|
/* XXX: set pure_optreset to 1 rather than these two */
|
||||||
|
static int nonopt_start = -1; /* first non option argument (for permute) */
|
||||||
|
static int nonopt_end = -1; /* first option after non options (for permute) */
|
||||||
|
|
||||||
|
/* Error messages */
|
||||||
|
static const char *recargchar = "option requires an argument -- %c\n";
|
||||||
|
static const char *recargstring = "option requires an argument -- %s\n";
|
||||||
|
static const char *ambig = "ambiguous option -- %.*s\n";
|
||||||
|
static const char *noarg = "option doesn't take an argument -- %.*s\n";
|
||||||
|
static const char *illoptchar = "unknown option -- %c\n";
|
||||||
|
static const char *illoptstring = "unknown option -- %s\n";
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Compute the greatest common divisor of a and b.
|
||||||
|
*/
|
||||||
|
static int pure_gcd(int a, int b)
|
||||||
|
{
|
||||||
|
int c;
|
||||||
|
|
||||||
|
c = a % b;
|
||||||
|
while (c != 0) {
|
||||||
|
a = b;
|
||||||
|
b = c;
|
||||||
|
c = a % b;
|
||||||
|
}
|
||||||
|
return b;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Exchange the block from nonopt_start to nonopt_end with the block
|
||||||
|
* from nonopt_end to opt_end (keeping the same order of arguments
|
||||||
|
* in each block).
|
||||||
|
*/
|
||||||
|
static void pure_permute_args(int panonopt_start, int panonopt_end,
|
||||||
|
int opt_end, char * const *nargv)
|
||||||
|
{
|
||||||
|
int cstart, cyclelen, i, j, ncycle, nnonopts, nopts, pos;
|
||||||
|
char *swap;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* compute lengths of blocks and number and size of cycles
|
||||||
|
*/
|
||||||
|
nnonopts = panonopt_end - panonopt_start;
|
||||||
|
nopts = opt_end - panonopt_end;
|
||||||
|
ncycle = pure_gcd(nnonopts, nopts);
|
||||||
|
cyclelen = (opt_end - panonopt_start) / ncycle;
|
||||||
|
|
||||||
|
for (i = 0; i < ncycle; i++) {
|
||||||
|
cstart = panonopt_end+i;
|
||||||
|
pos = cstart;
|
||||||
|
for (j = 0; j < cyclelen; j++) {
|
||||||
|
if (pos >= panonopt_end)
|
||||||
|
pos -= nnonopts;
|
||||||
|
else
|
||||||
|
pos += nopts;
|
||||||
|
swap = nargv[pos];
|
||||||
|
/* LINTED const cast */
|
||||||
|
((char **) nargv)[pos] = nargv[cstart];
|
||||||
|
/* LINTED const cast */
|
||||||
|
((char **)nargv)[cstart] = swap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pure_parse_long_options --
|
||||||
|
* Parse long options in argc/argv argument vector.
|
||||||
|
* Returns -1 if short_too is set and the option does not match long_options.
|
||||||
|
*/
|
||||||
|
static int pure_parse_long_options(char * const *nargv, const char *options,
|
||||||
|
const struct pure_option *long_options,
|
||||||
|
int *idx, int short_too)
|
||||||
|
{
|
||||||
|
const char *current_argv, *has_equal;
|
||||||
|
size_t current_argv_len;
|
||||||
|
int i, match;
|
||||||
|
|
||||||
|
current_argv = pure_place;
|
||||||
|
match = -1;
|
||||||
|
|
||||||
|
pure_optind++;
|
||||||
|
|
||||||
|
if ((has_equal = strchr(current_argv, '=')) != NULL) {
|
||||||
|
/* argument found (--option=arg) */
|
||||||
|
current_argv_len = has_equal - current_argv;
|
||||||
|
has_equal++;
|
||||||
|
} else
|
||||||
|
current_argv_len = strlen(current_argv);
|
||||||
|
|
||||||
|
for (i = 0; long_options[i].name; i++) {
|
||||||
|
/* find matching long option */
|
||||||
|
if (strncmp(current_argv, long_options[i].name,
|
||||||
|
current_argv_len))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (strlen(long_options[i].name) == current_argv_len) {
|
||||||
|
/* exact match */
|
||||||
|
match = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* If this is a known short option, don't allow
|
||||||
|
* a partial match of a single character.
|
||||||
|
*/
|
||||||
|
if (short_too && current_argv_len == 1)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (match == -1) /* partial match */
|
||||||
|
match = i;
|
||||||
|
else {
|
||||||
|
/* ambiguous abbreviation */
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, ambig, (int)current_argv_len,
|
||||||
|
current_argv);
|
||||||
|
pure_optopt = 0;
|
||||||
|
return BADCH;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (match != -1) { /* option found */
|
||||||
|
if (long_options[match].has_arg == no_argument
|
||||||
|
&& has_equal) {
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, noarg, (int)current_argv_len,
|
||||||
|
current_argv);
|
||||||
|
/*
|
||||||
|
* XXX: GNU sets pure_optopt to val regardless of flag
|
||||||
|
*/
|
||||||
|
if (long_options[match].flag == NULL)
|
||||||
|
pure_optopt = long_options[match].val;
|
||||||
|
else
|
||||||
|
pure_optopt = 0;
|
||||||
|
return BADARG;
|
||||||
|
}
|
||||||
|
if (long_options[match].has_arg == required_argument ||
|
||||||
|
long_options[match].has_arg == optional_argument) {
|
||||||
|
if (has_equal)
|
||||||
|
pure_optarg = has_equal;
|
||||||
|
else if (long_options[match].has_arg ==
|
||||||
|
required_argument) {
|
||||||
|
/*
|
||||||
|
* optional argument doesn't use next nargv
|
||||||
|
*/
|
||||||
|
pure_optarg = nargv[pure_optind++];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ((long_options[match].has_arg == required_argument)
|
||||||
|
&& (pure_optarg == NULL)) {
|
||||||
|
/*
|
||||||
|
* Missing argument; leading ':' indicates no error
|
||||||
|
* should be generated.
|
||||||
|
*/
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, recargstring,
|
||||||
|
current_argv);
|
||||||
|
/*
|
||||||
|
* XXX: GNU sets pure_optopt to val regardless of flag
|
||||||
|
*/
|
||||||
|
if (long_options[match].flag == NULL)
|
||||||
|
pure_optopt = long_options[match].val;
|
||||||
|
else
|
||||||
|
pure_optopt = 0;
|
||||||
|
--pure_optind;
|
||||||
|
return BADARG;
|
||||||
|
}
|
||||||
|
} else { /* unknown option */
|
||||||
|
if (short_too) {
|
||||||
|
--pure_optind;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, illoptstring, current_argv);
|
||||||
|
pure_optopt = 0;
|
||||||
|
return BADCH;
|
||||||
|
}
|
||||||
|
if (idx)
|
||||||
|
*idx = match;
|
||||||
|
if (long_options[match].flag) {
|
||||||
|
*long_options[match].flag = long_options[match].val;
|
||||||
|
return 0;
|
||||||
|
} else
|
||||||
|
return long_options[match].val;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getopt_internal --
|
||||||
|
* Parse argc/argv argument vector. Called by user level routines.
|
||||||
|
*/
|
||||||
|
static int pure_getopt_internal(int nargc, char * const *nargv,
|
||||||
|
const char *options,
|
||||||
|
const struct pure_option *long_options,
|
||||||
|
int *idx, int flags)
|
||||||
|
{
|
||||||
|
char *oli; /* option letter list index */
|
||||||
|
int optchar, short_too;
|
||||||
|
static int posixly_correct = -1;
|
||||||
|
|
||||||
|
if (options == NULL)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable GNU extensions if POSIXLY_CORRECT is set or options
|
||||||
|
* string begins with a '+'.
|
||||||
|
*/
|
||||||
|
if (posixly_correct == -1)
|
||||||
|
posixly_correct = (getenv("POSIXLY_CORRECT") != NULL);
|
||||||
|
if (posixly_correct || *options == '+')
|
||||||
|
flags &= ~FLAG_PERMUTE;
|
||||||
|
else if (*options == '-')
|
||||||
|
flags |= FLAG_ALLARGS;
|
||||||
|
if (*options == '+' || *options == '-')
|
||||||
|
options++;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* XXX Some GNU programs (like cvs) set pure_optind to 0 instead of
|
||||||
|
* XXX using pure_optreset. Work around this braindamage.
|
||||||
|
*/
|
||||||
|
if (pure_optind == 0)
|
||||||
|
pure_optind = pure_optreset = 1;
|
||||||
|
|
||||||
|
pure_optarg = NULL;
|
||||||
|
if (pure_optreset)
|
||||||
|
nonopt_start = nonopt_end = -1;
|
||||||
|
start:
|
||||||
|
if (pure_optreset || !*pure_place) { /* update scanning pointer */
|
||||||
|
pure_optreset = 0;
|
||||||
|
if (pure_optind >= nargc) { /* end of argument vector */
|
||||||
|
pure_place = EMSG;
|
||||||
|
if (nonopt_end != -1) {
|
||||||
|
/* do permutation, if we have to */
|
||||||
|
pure_permute_args(nonopt_start, nonopt_end,
|
||||||
|
pure_optind, nargv);
|
||||||
|
pure_optind -= nonopt_end - nonopt_start;
|
||||||
|
}
|
||||||
|
else if (nonopt_start != -1) {
|
||||||
|
/*
|
||||||
|
* If we skipped non-options, set pure_optind
|
||||||
|
* to the first of them.
|
||||||
|
*/
|
||||||
|
pure_optind = nonopt_start;
|
||||||
|
}
|
||||||
|
nonopt_start = nonopt_end = -1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if (*(pure_place = nargv[pure_optind]) != '-' ||
|
||||||
|
(pure_place[1] == '\0' && strchr(options, '-') == NULL)) {
|
||||||
|
pure_place = EMSG; /* found non-option */
|
||||||
|
if (flags & FLAG_ALLARGS) {
|
||||||
|
/*
|
||||||
|
* GNU extension:
|
||||||
|
* return non-option as argument to option 1
|
||||||
|
*/
|
||||||
|
pure_optarg = nargv[pure_optind++];
|
||||||
|
return INORDER;
|
||||||
|
}
|
||||||
|
if (!(flags & FLAG_PERMUTE)) {
|
||||||
|
/*
|
||||||
|
* If no permutation wanted, stop parsing
|
||||||
|
* at first non-option.
|
||||||
|
*/
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
/* do permutation */
|
||||||
|
if (nonopt_start == -1)
|
||||||
|
nonopt_start = pure_optind;
|
||||||
|
else if (nonopt_end != -1) {
|
||||||
|
pure_permute_args(nonopt_start, nonopt_end,
|
||||||
|
pure_optind, nargv);
|
||||||
|
nonopt_start = pure_optind -
|
||||||
|
(nonopt_end - nonopt_start);
|
||||||
|
nonopt_end = -1;
|
||||||
|
}
|
||||||
|
pure_optind++;
|
||||||
|
/* process next argument */
|
||||||
|
goto start;
|
||||||
|
}
|
||||||
|
if (nonopt_start != -1 && nonopt_end == -1)
|
||||||
|
nonopt_end = pure_optind;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for "--" or "--foo" with no long options
|
||||||
|
* but if pure_place is simply "-" leave it unmolested.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (pure_place[1] != '\0' && *++pure_place == '-' &&
|
||||||
|
(pure_place[1] == '\0' || long_options == NULL)) {
|
||||||
|
pure_optind++;
|
||||||
|
pure_place = EMSG;
|
||||||
|
/*
|
||||||
|
* We found an option (--), so if we skipped
|
||||||
|
* non-options, we have to permute.
|
||||||
|
*/
|
||||||
|
if (nonopt_end != -1) {
|
||||||
|
pure_permute_args(nonopt_start, nonopt_end,
|
||||||
|
pure_optind, nargv);
|
||||||
|
pure_optind -= nonopt_end - nonopt_start;
|
||||||
|
}
|
||||||
|
nonopt_start = nonopt_end = -1;
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check long options if:
|
||||||
|
* 1) we were passed some
|
||||||
|
* 2) the arg is not just "-"
|
||||||
|
* 3) either the arg starts with -- we are pure_getopt_long_only()
|
||||||
|
*/
|
||||||
|
if (long_options != NULL && pure_place != nargv[pure_optind] &&
|
||||||
|
(*pure_place == '-' || (flags & FLAG_LONGONLY))) {
|
||||||
|
short_too = 0;
|
||||||
|
if (*pure_place == '-')
|
||||||
|
pure_place++; /* --foo long option */
|
||||||
|
else if (*pure_place != ':' && strchr(options, *pure_place) != NULL)
|
||||||
|
short_too = 1; /* could be short option too */
|
||||||
|
|
||||||
|
optchar = pure_parse_long_options(nargv, options, long_options,
|
||||||
|
idx, short_too);
|
||||||
|
if (optchar != -1) {
|
||||||
|
pure_place = EMSG;
|
||||||
|
return optchar;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((optchar = (int) *pure_place++) == ':' ||
|
||||||
|
(optchar == '-' && *pure_place != '\0') ||
|
||||||
|
(oli = strchr(options, optchar)) == NULL) {
|
||||||
|
/*
|
||||||
|
* If the user specified "-" and '-' isn't listed in
|
||||||
|
* options, return -1 (non-option) as per POSIX.
|
||||||
|
* Otherwise, it is an unknown option character (or :').
|
||||||
|
*/
|
||||||
|
if (optchar == '-' && *pure_place == '\0')
|
||||||
|
return -1;
|
||||||
|
if (!*pure_place)
|
||||||
|
++pure_optind;
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, illoptchar, optchar);
|
||||||
|
pure_optopt = optchar;
|
||||||
|
return BADCH;
|
||||||
|
}
|
||||||
|
if (long_options != NULL && optchar == 'W' && oli[1] == ';') {
|
||||||
|
/* -W long-option */
|
||||||
|
if (*pure_place) /* no space */
|
||||||
|
/* NOTHING */;
|
||||||
|
else if (++pure_optind >= nargc) { /* no arg */
|
||||||
|
pure_place = EMSG;
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, recargchar, optchar);
|
||||||
|
pure_optopt = optchar;
|
||||||
|
return BADARG;
|
||||||
|
} else /* white space */
|
||||||
|
pure_place = nargv[pure_optind];
|
||||||
|
optchar = pure_parse_long_options(nargv, options, long_options,
|
||||||
|
idx, 0);
|
||||||
|
pure_place = EMSG;
|
||||||
|
return optchar;
|
||||||
|
}
|
||||||
|
if (*++oli != ':') { /* doesn't take argument */
|
||||||
|
if (!*pure_place)
|
||||||
|
++pure_optind;
|
||||||
|
} else { /* takes (optional) argument */
|
||||||
|
pure_optarg = NULL;
|
||||||
|
if (*pure_place) /* no white space */
|
||||||
|
pure_optarg = pure_place;
|
||||||
|
/* XXX: disable test for :: if PC? (GNU doesn't) */
|
||||||
|
else if (oli[1] != ':') { /* arg not optional */
|
||||||
|
if (++pure_optind >= nargc) { /* no arg */
|
||||||
|
pure_place = EMSG;
|
||||||
|
if (PRINT_ERROR)
|
||||||
|
fprintf(stderr, recargchar, optchar);
|
||||||
|
pure_optopt = optchar;
|
||||||
|
return BADARG;
|
||||||
|
} else {
|
||||||
|
pure_optarg = nargv[pure_optind];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pure_place = EMSG;
|
||||||
|
++pure_optind;
|
||||||
|
}
|
||||||
|
/* dump back option letter */
|
||||||
|
return optchar;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* getopt --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*/
|
||||||
|
int pure_getopt(int nargc, char * const *nargv, const char *options)
|
||||||
|
{
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We dont' pass FLAG_PERMUTE to pure_getopt_internal() since
|
||||||
|
* the BSD getopt(3) (unlike GNU) has never done this.
|
||||||
|
*
|
||||||
|
* Furthermore, since many privileged programs call getopt()
|
||||||
|
* before dropping privileges it makes sense to keep things
|
||||||
|
* as simple (and bug-free) as possible.
|
||||||
|
*/
|
||||||
|
return pure_getopt_internal(nargc, nargv, options, NULL, NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pure_getopt_long --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*/
|
||||||
|
int pure_getopt_long(int nargc, char * const *nargv, const char *options,
|
||||||
|
const struct pure_option *long_options, int *idx)
|
||||||
|
{
|
||||||
|
return pure_getopt_internal(nargc, nargv, options, long_options, idx,
|
||||||
|
FLAG_PERMUTE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* pure_getopt_long_only --
|
||||||
|
* Parse argc/argv argument vector.
|
||||||
|
*/
|
||||||
|
int pure_getopt_long_only(int nargc, char * const *nargv,
|
||||||
|
const char *options,
|
||||||
|
const struct pure_option *long_options,
|
||||||
|
int *idx)
|
||||||
|
{
|
||||||
|
return pure_getopt_internal(nargc, nargv, options, long_options, idx,
|
||||||
|
FLAG_PERMUTE|FLAG_LONGONLY);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
127
src/3rdparty/bsd-getopt-long.h
vendored
Normal file
127
src/3rdparty/bsd-getopt-long.h
vendored
Normal file
|
@ -0,0 +1,127 @@
|
||||||
|
/* $OpenBSD: getopt_long.c,v 1.13 2003/06/03 01:52:40 millert Exp $ */
|
||||||
|
/* $NetBSD: getopt_long.c,v 1.15 2002/01/31 22:43:40 tv Exp $ */
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2002 Todd C. Miller <Todd.Miller@courtesan.com>
|
||||||
|
*
|
||||||
|
* Permission to use, copy, modify, and distribute this software for any
|
||||||
|
* purpose with or without fee is hereby granted, provided that the above
|
||||||
|
* copyright notice and this permission notice appear in all copies.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS" AND TODD C. MILLER DISCLAIMS ALL
|
||||||
|
* WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
|
||||||
|
* OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL TODD C. MILLER BE LIABLE
|
||||||
|
* FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
||||||
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
|
||||||
|
* OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
|
||||||
|
* CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
||||||
|
*/
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 2000 The NetBSD Foundation, Inc.
|
||||||
|
* All rights reserved.
|
||||||
|
*
|
||||||
|
* This code is derived from software contributed to The NetBSD Foundation
|
||||||
|
* by Dieter Baron and Thomas Klausner.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. All advertising materials mentioning features or use of this software
|
||||||
|
* must display the following acknowledgement:
|
||||||
|
* This product includes software developed by the NetBSD
|
||||||
|
* Foundation, Inc. and its contributors.
|
||||||
|
* 4. Neither the name of The NetBSD Foundation nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived
|
||||||
|
* from this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
|
||||||
|
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
* TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
|
||||||
|
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
|
||||||
|
* BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||||
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||||
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||||
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||||
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||||
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||||
|
* POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifndef HAVE_GETOPT_LONG
|
||||||
|
|
||||||
|
/*
|
||||||
|
* GNU-like getopt_long() and 4.4BSD getsubopt()/optreset extensions
|
||||||
|
*/
|
||||||
|
# ifndef no_argument
|
||||||
|
# define no_argument 0
|
||||||
|
# endif
|
||||||
|
# ifndef required_argument
|
||||||
|
# define required_argument 1
|
||||||
|
# endif
|
||||||
|
# ifndef optional_argument
|
||||||
|
# define optional_argument 2
|
||||||
|
# endif
|
||||||
|
|
||||||
|
struct pure_option {
|
||||||
|
/* name of long option */
|
||||||
|
const char *name;
|
||||||
|
/*
|
||||||
|
* one of no_argument, required_argument, and optional_argument:
|
||||||
|
* whether option takes an argument
|
||||||
|
*/
|
||||||
|
int has_arg;
|
||||||
|
/* if not NULL, set *flag to val when option found */
|
||||||
|
int *flag;
|
||||||
|
/* if flag not NULL, value to set *flag to; else return value */
|
||||||
|
int val;
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int pure_getopt_long(int nargc, char * const *nargv, const char *options,
|
||||||
|
const struct pure_option *long_options, int *idx);
|
||||||
|
|
||||||
|
int pure_getopt_long_only(int nargc, char * const *nargv,
|
||||||
|
const char *options,
|
||||||
|
const struct pure_option *long_options,
|
||||||
|
int *idx);
|
||||||
|
|
||||||
|
int pure_getopt(int nargc, char * const *nargv, const char *options);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* prefix+macros just to avoid clashes with existing getopt() implementations */
|
||||||
|
|
||||||
|
# ifndef IN_GETOPT_LONG_C
|
||||||
|
# undef option
|
||||||
|
# define option pure_option
|
||||||
|
# undef getopt_long
|
||||||
|
# define getopt_long(A, B, C, D, E) pure_getopt_long(A, B, C, D, E)
|
||||||
|
# undef getopt_long_only
|
||||||
|
# define getopt_long_only(A, B, C, D, E) pure_getopt_long_only(A, B, C, D, E)
|
||||||
|
# undef getopt
|
||||||
|
# define getopt(A, B, C) pure_getopt(A, B, C)
|
||||||
|
# undef optarg
|
||||||
|
# define optarg pure_optarg
|
||||||
|
# undef opterr
|
||||||
|
# define opterr pure_opterr
|
||||||
|
# undef optind
|
||||||
|
# define optind pure_optind
|
||||||
|
# undef optopt
|
||||||
|
# define optopt pure_optopt
|
||||||
|
# undef optreset
|
||||||
|
# define optreset pure_optreset
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#endif
|
141
src/3rdparty/in_cksum.cc
vendored
Normal file
141
src/3rdparty/in_cksum.cc
vendored
Normal file
|
@ -0,0 +1,141 @@
|
||||||
|
// Modified from tcpdump v4.9.3's in_cksum.c (which itself was a modified
|
||||||
|
// version of FreeBSD's in_cksum.c).
|
||||||
|
|
||||||
|
/* in_cksum.c
|
||||||
|
* 4.4-Lite-2 Internet checksum routine, modified to take a vector of
|
||||||
|
* pointers/lengths giving the pieces to be checksummed. Also using
|
||||||
|
* Tahoe/CGI version of ADDCARRY(x) macro instead of from portable version.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (c) 1988, 1992, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*
|
||||||
|
* @(#)in_cksum.c 8.1 (Berkeley) 6/10/93
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "zeek/net_util.h"
|
||||||
|
|
||||||
|
namespace zeek::detail {
|
||||||
|
|
||||||
|
#define ADDCARRY(x) {if ((x) > 65535) (x) -= 65535;}
|
||||||
|
#define REDUCE {l_util.l = sum; sum = l_util.s[0] + l_util.s[1]; ADDCARRY(sum);}
|
||||||
|
|
||||||
|
uint16_t in_cksum(const struct checksum_block *vec, int veclen)
|
||||||
|
{
|
||||||
|
const uint16_t *w;
|
||||||
|
int sum = 0;
|
||||||
|
int mlen = 0;
|
||||||
|
int byte_swapped = 0;
|
||||||
|
|
||||||
|
union {
|
||||||
|
uint8_t c[2];
|
||||||
|
uint16_t s;
|
||||||
|
} s_util;
|
||||||
|
union {
|
||||||
|
uint16_t s[2];
|
||||||
|
uint32_t l;
|
||||||
|
} l_util;
|
||||||
|
|
||||||
|
for (; veclen != 0; vec++, veclen--) {
|
||||||
|
if (vec->len == 0)
|
||||||
|
continue;
|
||||||
|
w = (const uint16_t *)(const void *)vec->block;
|
||||||
|
if (mlen == -1) {
|
||||||
|
/*
|
||||||
|
* The first byte of this chunk is the continuation
|
||||||
|
* of a word spanning between this chunk and the
|
||||||
|
* last chunk.
|
||||||
|
*
|
||||||
|
* s_util.c[0] is already saved when scanning previous
|
||||||
|
* chunk.
|
||||||
|
*/
|
||||||
|
s_util.c[1] = *(const uint8_t *)w;
|
||||||
|
sum += s_util.s;
|
||||||
|
w = (const uint16_t *)(const void *)((const uint8_t *)w + 1);
|
||||||
|
mlen = vec->len - 1;
|
||||||
|
} else
|
||||||
|
mlen = vec->len;
|
||||||
|
/*
|
||||||
|
* Force to even boundary.
|
||||||
|
*/
|
||||||
|
if ((1 & (uintptr_t) w) && (mlen > 0)) {
|
||||||
|
REDUCE;
|
||||||
|
sum <<= 8;
|
||||||
|
s_util.c[0] = *(const uint8_t *)w;
|
||||||
|
w = (const uint16_t *)(const void *)((const uint8_t *)w + 1);
|
||||||
|
mlen--;
|
||||||
|
byte_swapped = 1;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* Unroll the loop to make overhead from
|
||||||
|
* branches &c small.
|
||||||
|
*/
|
||||||
|
while ((mlen -= 32) >= 0) {
|
||||||
|
sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
|
||||||
|
sum += w[4]; sum += w[5]; sum += w[6]; sum += w[7];
|
||||||
|
sum += w[8]; sum += w[9]; sum += w[10]; sum += w[11];
|
||||||
|
sum += w[12]; sum += w[13]; sum += w[14]; sum += w[15];
|
||||||
|
w += 16;
|
||||||
|
}
|
||||||
|
mlen += 32;
|
||||||
|
while ((mlen -= 8) >= 0) {
|
||||||
|
sum += w[0]; sum += w[1]; sum += w[2]; sum += w[3];
|
||||||
|
w += 4;
|
||||||
|
}
|
||||||
|
mlen += 8;
|
||||||
|
if (mlen == 0 && byte_swapped == 0)
|
||||||
|
continue;
|
||||||
|
REDUCE;
|
||||||
|
while ((mlen -= 2) >= 0) {
|
||||||
|
sum += *w++;
|
||||||
|
}
|
||||||
|
if (byte_swapped) {
|
||||||
|
REDUCE;
|
||||||
|
sum <<= 8;
|
||||||
|
byte_swapped = 0;
|
||||||
|
if (mlen == -1) {
|
||||||
|
s_util.c[1] = *(const uint8_t *)w;
|
||||||
|
sum += s_util.s;
|
||||||
|
mlen = 0;
|
||||||
|
} else
|
||||||
|
mlen = -1;
|
||||||
|
} else if (mlen == -1)
|
||||||
|
s_util.c[0] = *(const uint8_t *)w;
|
||||||
|
}
|
||||||
|
if (mlen == -1) {
|
||||||
|
/* The last mbuf has odd # of bytes. Follow the
|
||||||
|
standard (the odd byte may be shifted left by 8 bits
|
||||||
|
or not as determined by endian-ness of the machine) */
|
||||||
|
s_util.c[1] = 0;
|
||||||
|
sum += s_util.s;
|
||||||
|
}
|
||||||
|
REDUCE;
|
||||||
|
return sum;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace zeek
|
494
src/3rdparty/modp_numtoa.c
vendored
Normal file
494
src/3rdparty/modp_numtoa.c
vendored
Normal file
|
@ -0,0 +1,494 @@
|
||||||
|
/* -*- mode: c; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
|
||||||
|
/* vi: set expandtab shiftwidth=4 tabstop=4: */
|
||||||
|
|
||||||
|
#include "modp_numtoa.h"
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <math.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <float.h>
|
||||||
|
|
||||||
|
// other interesting references on num to string convesion
|
||||||
|
// http://www.jb.man.ac.uk/~slowe/cpp/itoa.html
|
||||||
|
// and http://www.ddj.com/dept/cpp/184401596?pgno=6
|
||||||
|
|
||||||
|
// Version 19-Nov-2007
|
||||||
|
// Fixed round-to-even rules to match printf
|
||||||
|
// thanks to Johannes Otepka
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Powers of 10
|
||||||
|
* 10^0 to 10^9
|
||||||
|
*/
|
||||||
|
static const double _pow10[] = {1, 10, 100, 1000, 10000, 100000, 1000000,
|
||||||
|
10000000, 100000000, 1000000000};
|
||||||
|
static const double _pow10r[] = {1, .1, .01, .001, .0001, .00001, .000001,
|
||||||
|
.0000001, .00000001, .000000001};
|
||||||
|
|
||||||
|
static void strreverse(char* begin, char* end)
|
||||||
|
{
|
||||||
|
char aux;
|
||||||
|
while (end > begin)
|
||||||
|
aux = *end, *end-- = *begin, *begin++ = aux;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Expects 'str' to have been made using "%e" scientific notation format string
|
||||||
|
static void sn_strip_trailing_zeros(char* str)
|
||||||
|
{
|
||||||
|
char* frac = 0;
|
||||||
|
|
||||||
|
for ( ; ; )
|
||||||
|
{
|
||||||
|
if ( *str == '.' )
|
||||||
|
{
|
||||||
|
frac = str + 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( *str == 0 )
|
||||||
|
break;
|
||||||
|
|
||||||
|
++str;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( ! frac )
|
||||||
|
return;
|
||||||
|
|
||||||
|
char* start_dec = frac;
|
||||||
|
char* exp = 0;
|
||||||
|
char* trailing_zeros = 0;
|
||||||
|
|
||||||
|
for ( ; ; )
|
||||||
|
{
|
||||||
|
if ( *frac == 0 )
|
||||||
|
break;
|
||||||
|
|
||||||
|
if ( *frac == 'e' )
|
||||||
|
{
|
||||||
|
exp = frac;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( *frac == '0' )
|
||||||
|
{
|
||||||
|
if ( ! trailing_zeros )
|
||||||
|
trailing_zeros = frac;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
trailing_zeros = 0;
|
||||||
|
|
||||||
|
++frac;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ( trailing_zeros == start_dec )
|
||||||
|
--trailing_zeros;
|
||||||
|
|
||||||
|
if ( trailing_zeros && exp )
|
||||||
|
{
|
||||||
|
for ( ; ; )
|
||||||
|
{
|
||||||
|
*trailing_zeros = *exp;
|
||||||
|
|
||||||
|
if ( *exp == 0 )
|
||||||
|
break;
|
||||||
|
|
||||||
|
++trailing_zeros;
|
||||||
|
++exp;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void modp_itoa10(int32_t value, char* str)
|
||||||
|
{
|
||||||
|
char* wstr=str;
|
||||||
|
// Take care of sign
|
||||||
|
unsigned int uvalue = (value < 0) ? -value : value;
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
|
||||||
|
if (value < 0) *wstr++ = '-';
|
||||||
|
*wstr='\0';
|
||||||
|
|
||||||
|
// Reverse string
|
||||||
|
strreverse(str,wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void modp_uitoa10(uint32_t value, char* str)
|
||||||
|
{
|
||||||
|
char* wstr=str;
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (value % 10)); while (value /= 10);
|
||||||
|
*wstr='\0';
|
||||||
|
// Reverse string
|
||||||
|
strreverse(str, wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void modp_litoa10(int64_t value, char* str)
|
||||||
|
{
|
||||||
|
char* wstr=str;
|
||||||
|
uint64_t uvalue = (value < 0) ? (value == INT64_MIN ? (uint64_t)(INT64_MAX) + 1 : -value) : value;
|
||||||
|
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (uvalue % 10)); while(uvalue /= 10);
|
||||||
|
if (value < 0) *wstr++ = '-';
|
||||||
|
*wstr='\0';
|
||||||
|
|
||||||
|
// Reverse string
|
||||||
|
strreverse(str,wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void modp_ulitoa10(uint64_t value, char* str)
|
||||||
|
{
|
||||||
|
char* wstr=str;
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (value % 10)); while (value /= 10);
|
||||||
|
*wstr='\0';
|
||||||
|
// Reverse string
|
||||||
|
strreverse(str, wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
void modp_dtoa(double value, char* str, int prec)
|
||||||
|
{
|
||||||
|
/* Hacky test for NaN
|
||||||
|
* under -fast-math this won't work, but then you also won't
|
||||||
|
* have correct nan values anyways. The alternative is
|
||||||
|
* to link with libmath (bad) or hack IEEE double bits (bad)
|
||||||
|
*/
|
||||||
|
if (! (value == value)) {
|
||||||
|
str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we'll work in positive values and deal with the
|
||||||
|
negative sign issue later */
|
||||||
|
int neg = 0;
|
||||||
|
if (value < 0) {
|
||||||
|
neg = 1;
|
||||||
|
value = -value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if input is larger than thres_max, revert to exponential */
|
||||||
|
const double thres_max = (double)(INT_MAX);
|
||||||
|
|
||||||
|
/* for very large numbers switch back to native sprintf for exponentials.
|
||||||
|
anyone want to write code to replace this? */
|
||||||
|
/*
|
||||||
|
normal printf behavior is to print EVERY whole number digit
|
||||||
|
which can be 100s of characters overflowing your buffers == bad
|
||||||
|
*/
|
||||||
|
if (value >= thres_max) {
|
||||||
|
sprintf(str, "%.*e", DBL_DECIMAL_DIG - 1, neg ? -value : value);
|
||||||
|
sn_strip_trailing_zeros(str);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
double diff = 0.0;
|
||||||
|
char* wstr = str;
|
||||||
|
|
||||||
|
if (prec < 0) {
|
||||||
|
prec = 0;
|
||||||
|
} else if (prec > 9) {
|
||||||
|
/* precision of >= 10 can lead to overflow errors */
|
||||||
|
prec = 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
int whole = (int) value;
|
||||||
|
double tmp = (value - whole) * _pow10[prec];
|
||||||
|
uint32_t frac = (uint32_t)(tmp);
|
||||||
|
diff = tmp - frac;
|
||||||
|
|
||||||
|
if (diff > 0.5) {
|
||||||
|
++frac;
|
||||||
|
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
|
||||||
|
if (frac >= _pow10[prec]) {
|
||||||
|
frac = 0;
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
|
||||||
|
/* if halfway, round up if odd, OR
|
||||||
|
if last digit is 0. That last part is strange */
|
||||||
|
++frac;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prec == 0) {
|
||||||
|
diff = value - whole;
|
||||||
|
if (diff > 0.5) {
|
||||||
|
/* greater than 0.5, round up, e.g. 1.6 -> 2 */
|
||||||
|
++whole;
|
||||||
|
} else if (diff == 0.5 && (whole & 1)) {
|
||||||
|
/* exactly 0.5 and ODD, then round up */
|
||||||
|
/* 1.5 -> 2, but 2.5 -> 2 */
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
int count = prec;
|
||||||
|
// now do fractional part, as an unsigned number
|
||||||
|
do {
|
||||||
|
--count;
|
||||||
|
*wstr++ = (char)(48 + (frac % 10));
|
||||||
|
} while (frac /= 10);
|
||||||
|
// add extra 0s
|
||||||
|
while (count-- > 0) *wstr++ = '0';
|
||||||
|
// add decimal
|
||||||
|
*wstr++ = '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
// do whole part
|
||||||
|
// Take care of sign
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
|
||||||
|
if (neg) {
|
||||||
|
*wstr++ = '-';
|
||||||
|
}
|
||||||
|
*wstr='\0';
|
||||||
|
strreverse(str, wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This is near identical to modp_dtoa above
|
||||||
|
// The differnce is noted below
|
||||||
|
void modp_dtoa2(double value, char* str, int prec)
|
||||||
|
{
|
||||||
|
/* Hacky test for NaN
|
||||||
|
* under -fast-math this won't work, but then you also won't
|
||||||
|
* have correct nan values anyways. The alternative is
|
||||||
|
* to link with libmath (bad) or hack IEEE double bits (bad)
|
||||||
|
*/
|
||||||
|
if (! (value == value)) {
|
||||||
|
str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we'll work in positive values and deal with the
|
||||||
|
negative sign issue later */
|
||||||
|
int neg = 0;
|
||||||
|
if (value < 0) {
|
||||||
|
neg = 1;
|
||||||
|
value = -value;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if input is larger than thres_max, revert to exponential */
|
||||||
|
const double thres_max = (double)(INT_MAX);
|
||||||
|
|
||||||
|
/* for very large numbers switch back to native sprintf for exponentials.
|
||||||
|
anyone want to write code to replace this? */
|
||||||
|
/*
|
||||||
|
normal printf behavior is to print EVERY whole number digit
|
||||||
|
which can be 100s of characters overflowing your buffers == bad
|
||||||
|
*/
|
||||||
|
if (value >= thres_max) {
|
||||||
|
sprintf(str, "%.*e", DBL_DECIMAL_DIG - 1, neg ? -value : value);
|
||||||
|
sn_strip_trailing_zeros(str);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int count;
|
||||||
|
double diff = 0.0;
|
||||||
|
char* wstr = str;
|
||||||
|
|
||||||
|
if (prec < 0) {
|
||||||
|
prec = 0;
|
||||||
|
} else if (prec > 9) {
|
||||||
|
/* precision of >= 10 can lead to overflow errors */
|
||||||
|
prec = 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
double smallest = _pow10r[prec];
|
||||||
|
|
||||||
|
if (value != 0.0 && value < smallest) {
|
||||||
|
sprintf(str, "%.*e", DBL_DECIMAL_DIG - 1, neg ? -value : value);
|
||||||
|
sn_strip_trailing_zeros(str);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int whole = (int) value;
|
||||||
|
double tmp = (value - whole) * _pow10[prec];
|
||||||
|
uint32_t frac = (uint32_t)(tmp);
|
||||||
|
diff = tmp - frac;
|
||||||
|
|
||||||
|
if (diff > 0.5) {
|
||||||
|
++frac;
|
||||||
|
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
|
||||||
|
if (frac >= _pow10[prec]) {
|
||||||
|
frac = 0;
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
|
||||||
|
/* if halfway, round up if odd, OR
|
||||||
|
if last digit is 0. That last part is strange */
|
||||||
|
++frac;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prec == 0) {
|
||||||
|
diff = value - whole;
|
||||||
|
if (diff > 0.5) {
|
||||||
|
/* greater than 0.5, round up, e.g. 1.6 -> 2 */
|
||||||
|
++whole;
|
||||||
|
} else if (diff == 0.5 && (whole & 1)) {
|
||||||
|
/* exactly 0.5 and ODD, then round up */
|
||||||
|
/* 1.5 -> 2, but 2.5 -> 2 */
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
|
||||||
|
//vvvvvvvvvvvvvvvvvvv Diff from modp_dto2
|
||||||
|
} else if (frac) {
|
||||||
|
count = prec;
|
||||||
|
// now do fractional part, as an unsigned number
|
||||||
|
// we know it is not 0 but we can have leading zeros, these
|
||||||
|
// should be removed
|
||||||
|
while (!(frac % 10)) {
|
||||||
|
--count;
|
||||||
|
frac /= 10;
|
||||||
|
}
|
||||||
|
//^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2
|
||||||
|
|
||||||
|
// now do fractional part, as an unsigned number
|
||||||
|
do {
|
||||||
|
--count;
|
||||||
|
*wstr++ = (char)(48 + (frac % 10));
|
||||||
|
} while (frac /= 10);
|
||||||
|
// add extra 0s
|
||||||
|
while (count-- > 0) *wstr++ = '0';
|
||||||
|
// add decimal
|
||||||
|
*wstr++ = '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
// do whole part
|
||||||
|
// Take care of sign
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
|
||||||
|
if (neg) {
|
||||||
|
*wstr++ = '-';
|
||||||
|
}
|
||||||
|
*wstr='\0';
|
||||||
|
strreverse(str, wstr-1);
|
||||||
|
}
|
||||||
|
|
||||||
|
// This is near identical to modp_dtoa2 above, excep that it never uses
|
||||||
|
// exponential notation and requires a buffer length.
|
||||||
|
void modp_dtoa3(double value, char* str, int n, int prec)
|
||||||
|
{
|
||||||
|
/* Hacky test for NaN
|
||||||
|
* under -fast-math this won't work, but then you also won't
|
||||||
|
* have correct nan values anyways. The alternative is
|
||||||
|
* to link with libmath (bad) or hack IEEE double bits (bad)
|
||||||
|
*/
|
||||||
|
if (! (value == value)) {
|
||||||
|
str[0] = 'n'; str[1] = 'a'; str[2] = 'n'; str[3] = '\0';
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* we'll work in positive values and deal with the
|
||||||
|
negative sign issue later */
|
||||||
|
int neg = 0;
|
||||||
|
if (value < 0) {
|
||||||
|
neg = 1;
|
||||||
|
value = -value;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prec < 0) {
|
||||||
|
prec = 0;
|
||||||
|
} else if (prec > 9) {
|
||||||
|
/* precision of >= 10 can lead to overflow errors */
|
||||||
|
prec = 9;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* if input is larger than thres_max, revert to exponential */
|
||||||
|
const double thres_max = (double)(INT_MAX);
|
||||||
|
|
||||||
|
/* for very large numbers switch back to native sprintf for exponentials.
|
||||||
|
anyone want to write code to replace this? */
|
||||||
|
/*
|
||||||
|
normal printf behavior is to print EVERY whole number digit
|
||||||
|
which can be 100s of characters overflowing your buffers == bad
|
||||||
|
*/
|
||||||
|
if (value >= thres_max) {
|
||||||
|
/* ---- Modified part, compared to modp_dtoa3. */
|
||||||
|
int i = snprintf(str, n, "%.*f", prec, neg ? -value : value);
|
||||||
|
|
||||||
|
if ( i < 0 || i >= n ) {
|
||||||
|
// Error or truncated output.
|
||||||
|
snprintf(str, n, "NAN");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Remove trailing zeros. */
|
||||||
|
|
||||||
|
char* p;
|
||||||
|
for ( p = str + i - 1; p >= str && *p == '0'; --p );
|
||||||
|
|
||||||
|
if ( p >= str && *p == '.' )
|
||||||
|
--p;
|
||||||
|
|
||||||
|
*++p = '\0';
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* ---- End of modified part.. */
|
||||||
|
}
|
||||||
|
|
||||||
|
int count;
|
||||||
|
double diff = 0.0;
|
||||||
|
char* wstr = str;
|
||||||
|
|
||||||
|
int whole = (int) value;
|
||||||
|
double tmp = (value - whole) * _pow10[prec];
|
||||||
|
uint32_t frac = (uint32_t)(tmp);
|
||||||
|
diff = tmp - frac;
|
||||||
|
|
||||||
|
if (diff > 0.5) {
|
||||||
|
++frac;
|
||||||
|
/* handle rollover, e.g. case 0.99 with prec 1 is 1.0 */
|
||||||
|
if (frac >= _pow10[prec]) {
|
||||||
|
frac = 0;
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
} else if (diff == 0.5 && ((frac == 0) || (frac & 1))) {
|
||||||
|
/* if halfway, round up if odd, OR
|
||||||
|
if last digit is 0. That last part is strange */
|
||||||
|
++frac;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (prec == 0) {
|
||||||
|
diff = value - whole;
|
||||||
|
if (diff > 0.5) {
|
||||||
|
/* greater than 0.5, round up, e.g. 1.6 -> 2 */
|
||||||
|
++whole;
|
||||||
|
} else if (diff == 0.5 && (whole & 1)) {
|
||||||
|
/* exactly 0.5 and ODD, then round up */
|
||||||
|
/* 1.5 -> 2, but 2.5 -> 2 */
|
||||||
|
++whole;
|
||||||
|
}
|
||||||
|
|
||||||
|
//vvvvvvvvvvvvvvvvvvv Diff from modp_dto2
|
||||||
|
} else if (frac) {
|
||||||
|
count = prec;
|
||||||
|
// now do fractional part, as an unsigned number
|
||||||
|
// we know it is not 0 but we can have leading zeros, these
|
||||||
|
// should be removed
|
||||||
|
while (!(frac % 10)) {
|
||||||
|
--count;
|
||||||
|
frac /= 10;
|
||||||
|
}
|
||||||
|
//^^^^^^^^^^^^^^^^^^^ Diff from modp_dto2
|
||||||
|
|
||||||
|
// now do fractional part, as an unsigned number
|
||||||
|
do {
|
||||||
|
--count;
|
||||||
|
*wstr++ = (char)(48 + (frac % 10));
|
||||||
|
} while (frac /= 10);
|
||||||
|
// add extra 0s
|
||||||
|
while (count-- > 0) *wstr++ = '0';
|
||||||
|
// add decimal
|
||||||
|
*wstr++ = '.';
|
||||||
|
}
|
||||||
|
|
||||||
|
// do whole part
|
||||||
|
// Take care of sign
|
||||||
|
// Conversion. Number is reversed.
|
||||||
|
do *wstr++ = (char)(48 + (whole % 10)); while (whole /= 10);
|
||||||
|
if (neg) {
|
||||||
|
*wstr++ = '-';
|
||||||
|
}
|
||||||
|
*wstr='\0';
|
||||||
|
strreverse(str, wstr-1);
|
||||||
|
}
|
114
src/3rdparty/modp_numtoa.h
vendored
Normal file
114
src/3rdparty/modp_numtoa.h
vendored
Normal file
|
@ -0,0 +1,114 @@
|
||||||
|
/* -*- mode: c++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
|
||||||
|
/* vi: set expandtab shiftwidth=4 tabstop=4: */
|
||||||
|
|
||||||
|
/**
|
||||||
|
* \file
|
||||||
|
*
|
||||||
|
* <pre>
|
||||||
|
* Copyright © 2007, Nick Galbreath -- nickg [at] modp [dot] com
|
||||||
|
* All rights reserved.
|
||||||
|
* http://code.google.com/p/stringencoders/
|
||||||
|
* Released under the bsd license.
|
||||||
|
* </pre>
|
||||||
|
*
|
||||||
|
* This defines signed/unsigned integer, and 'double' to char buffer
|
||||||
|
* converters. The standard way of doing this is with "sprintf", however
|
||||||
|
* these functions are
|
||||||
|
* * guarenteed maximum size output
|
||||||
|
* * 5-20x faster!
|
||||||
|
* * core-dump safe
|
||||||
|
*
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
#define BEGIN_C extern "C" {
|
||||||
|
#define END_C }
|
||||||
|
#else
|
||||||
|
#define BEGIN_C
|
||||||
|
#define END_C
|
||||||
|
#endif
|
||||||
|
|
||||||
|
BEGIN_C
|
||||||
|
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
/** \brief convert an signed integer to char buffer
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf the output buffer. Should be 16 chars or more.
|
||||||
|
*/
|
||||||
|
void modp_itoa10(int32_t value, char* buf);
|
||||||
|
|
||||||
|
/** \brief convert an unsigned integer to char buffer
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf The output buffer, should be 16 chars or more.
|
||||||
|
*/
|
||||||
|
void modp_uitoa10(uint32_t value, char* buf);
|
||||||
|
|
||||||
|
/** \brief convert an signed long integer to char buffer
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf the output buffer. Should be 24 chars or more.
|
||||||
|
*/
|
||||||
|
void modp_litoa10(int64_t value, char* buf);
|
||||||
|
|
||||||
|
/** \brief convert an unsigned long integer to char buffer
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf The output buffer, should be 24 chars or more.
|
||||||
|
*/
|
||||||
|
void modp_ulitoa10(uint64_t value, char* buf);
|
||||||
|
|
||||||
|
/** \brief convert a floating point number to char buffer with
|
||||||
|
* fixed-precision format
|
||||||
|
*
|
||||||
|
* This is similar to "%.[0-9]f" in the printf style. It will include
|
||||||
|
* trailing zeros
|
||||||
|
*
|
||||||
|
* If the input value is greater than 1<<31, then the output format
|
||||||
|
* will be switched exponential format and include as many precision digits
|
||||||
|
* as needed to preserve information.
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf The allocated output buffer. Should be 32 chars or more.
|
||||||
|
* \param[in] precision Number of digits to the right of the decimal point.
|
||||||
|
* Can only be 0-9.
|
||||||
|
*/
|
||||||
|
void modp_dtoa(double value, char* buf, int precision);
|
||||||
|
|
||||||
|
/** \brief convert a floating point number to char buffer with a
|
||||||
|
* variable-precision format, and no trailing zeros
|
||||||
|
*
|
||||||
|
* This is similar to "%.[0-9]f" in the printf style, except it will
|
||||||
|
* NOT include trailing zeros after the decimal point. This type
|
||||||
|
* of format oddly does not exists with printf.
|
||||||
|
*
|
||||||
|
* If the input value is greater than 1<<31, then the output format
|
||||||
|
* will be switched exponential format and include as many precision digits
|
||||||
|
* as needed to preserve information.
|
||||||
|
*
|
||||||
|
* If a non-zero input value is less than 10^(-precision), the output format
|
||||||
|
* will be switched exponential format and include as many precision digits
|
||||||
|
* as needed to preserve information.
|
||||||
|
*
|
||||||
|
* \param[in] value
|
||||||
|
* \param[out] buf The allocated output buffer. Should be 32 chars or more.
|
||||||
|
* \param[in] precision Number of digits to the right of the decimal point.
|
||||||
|
* Can only be 0-9.
|
||||||
|
*/
|
||||||
|
void modp_dtoa2(double value, char* buf, int precision);
|
||||||
|
|
||||||
|
/** \brief convert a floating point number to char buffer with a
|
||||||
|
* variable-precision format, no trailing zeros, and no
|
||||||
|
* scientific notation.
|
||||||
|
*
|
||||||
|
* Other than avoiding scientific notation, this is the same as mop_dtoa2. It does however
|
||||||
|
* require the max buffer length. The buffer will always be null-terminated.
|
||||||
|
*/
|
||||||
|
void modp_dtoa3(double value, char* buf, int n, int precision);
|
||||||
|
|
||||||
|
END_C
|
1174
src/3rdparty/patricia.c
vendored
Normal file
1174
src/3rdparty/patricia.c
vendored
Normal file
File diff suppressed because it is too large
Load diff
195
src/3rdparty/patricia.h
vendored
Normal file
195
src/3rdparty/patricia.h
vendored
Normal file
|
@ -0,0 +1,195 @@
|
||||||
|
/*
|
||||||
|
* This code originates from Dave Plonka's Net::Security perl module. An adaptation
|
||||||
|
* of it in C is kept at https://github.com/CAIDA/cc-common/tree/master/libpatricia.
|
||||||
|
* That repository is considered the upstream version for Zeek's fork. We make some
|
||||||
|
* custom changes to this upstream:
|
||||||
|
* - Replace void_fn_t with data_fn_t and prefix_data_fn_t
|
||||||
|
* - Add patricia_search_all method
|
||||||
|
*
|
||||||
|
* The current version is based on commit 4a2c61374f507a420d28bd9084c976142d279605
|
||||||
|
* from that repo.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Dave Plonka <plonka@doit.wisc.edu>
|
||||||
|
*
|
||||||
|
* This product includes software developed by the University of Michigan,
|
||||||
|
* Merit Network, Inc., and their contributors.
|
||||||
|
*
|
||||||
|
* This file had been called "radix.h" in the MRT sources.
|
||||||
|
*
|
||||||
|
* I renamed it to "patricia.h" since it's not an implementation of a general
|
||||||
|
* radix trie. Also, pulled in various requirements from "mrt.h" and added
|
||||||
|
* some other things it could be used as a standalone API.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* From copyright.txt:
|
||||||
|
*
|
||||||
|
* Copyright (c) 1997, 1998, 1999
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* The Regents of the University of Michigan ("The Regents") and Merit Network,
|
||||||
|
* Inc. All rights reserved.
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions are met:
|
||||||
|
* 1. Redistributions of source code must retain the above
|
||||||
|
* copyright notice, this list of conditions and the
|
||||||
|
* following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above
|
||||||
|
* copyright notice, this list of conditions and the
|
||||||
|
* following disclaimer in the documentation and/or other
|
||||||
|
* materials provided with the distribution.
|
||||||
|
* 3. All advertising materials mentioning features or use of
|
||||||
|
* this software must display the following acknowledgement:
|
||||||
|
* This product includes software developed by the University of Michigan, Merit
|
||||||
|
* Network, Inc., and their contributors.
|
||||||
|
* 4. Neither the name of the University, Merit Network, nor the
|
||||||
|
* names of their contributors may be used to endorse or
|
||||||
|
* promote products derived from this software without
|
||||||
|
* specific prior written permission.
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS" AND ANY
|
||||||
|
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||||
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||||
|
* DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
|
||||||
|
* DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||||
|
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||||
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||||
|
* ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
/* { from defs.h */
|
||||||
|
#define prefix_touchar(prefix) ((u_char *)&(prefix)->add.sin)
|
||||||
|
#define MAXLINE 1024
|
||||||
|
#define BIT_TEST(f, b) ((f) & (b))
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
#define addroute make_and_lookup
|
||||||
|
|
||||||
|
#include <sys/types.h> /* for u_* definitions (on FreeBSD 5) */
|
||||||
|
|
||||||
|
#include <errno.h> /* for EAFNOSUPPORT */
|
||||||
|
#ifndef EAFNOSUPPORT
|
||||||
|
# defined EAFNOSUPPORT WSAEAFNOSUPPORT
|
||||||
|
# include <winsock.h>
|
||||||
|
#else
|
||||||
|
# include <netinet/in.h> /* for struct in_addr */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include <sys/socket.h> /* for AF_INET */
|
||||||
|
|
||||||
|
/* { from mrt.h */
|
||||||
|
|
||||||
|
typedef struct _prefix4_t {
|
||||||
|
u_short family; /* AF_INET | AF_INET6 */
|
||||||
|
u_short bitlen; /* same as mask? */
|
||||||
|
int ref_count; /* reference count */
|
||||||
|
struct in_addr sin;
|
||||||
|
} prefix4_t;
|
||||||
|
|
||||||
|
typedef struct _prefix_t {
|
||||||
|
u_short family; /* AF_INET | AF_INET6 */
|
||||||
|
u_short bitlen; /* same as mask? */
|
||||||
|
int ref_count; /* reference count */
|
||||||
|
union {
|
||||||
|
struct in_addr sin;
|
||||||
|
struct in6_addr sin6;
|
||||||
|
} add;
|
||||||
|
} prefix_t;
|
||||||
|
|
||||||
|
typedef void (*data_fn_t)(void*);
|
||||||
|
typedef void (*prefix_data_fn_t)(prefix_t*, void*);
|
||||||
|
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
typedef struct _patricia_node_t {
|
||||||
|
u_int bit; /* flag if this node used */
|
||||||
|
prefix_t *prefix; /* who we are in patricia tree */
|
||||||
|
struct _patricia_node_t *l, *r; /* left and right children */
|
||||||
|
struct _patricia_node_t *parent;/* may be used */
|
||||||
|
void *data; /* pointer to data */
|
||||||
|
void *user1; /* pointer to usr data (ex. route flap info) */
|
||||||
|
} patricia_node_t;
|
||||||
|
|
||||||
|
typedef struct _patricia_tree_t {
|
||||||
|
patricia_node_t *head;
|
||||||
|
u_int maxbits; /* for IP, 32 bit addresses */
|
||||||
|
int num_active_node; /* for debug purpose */
|
||||||
|
} patricia_tree_t;
|
||||||
|
|
||||||
|
|
||||||
|
patricia_node_t *patricia_search_exact (patricia_tree_t *patricia, prefix_t *prefix);
|
||||||
|
bool patricia_search_all (patricia_tree_t *patricia, prefix_t *prefix, patricia_node_t ***list, int *n);
|
||||||
|
patricia_node_t *patricia_search_best (patricia_tree_t *patricia, prefix_t *prefix);
|
||||||
|
patricia_node_t * patricia_search_best2 (patricia_tree_t *patricia, prefix_t *prefix,
|
||||||
|
int inclusive);
|
||||||
|
patricia_node_t *patricia_lookup (patricia_tree_t *patricia, prefix_t *prefix);
|
||||||
|
void patricia_remove (patricia_tree_t *patricia, patricia_node_t *node);
|
||||||
|
patricia_tree_t *New_Patricia (int maxbits);
|
||||||
|
void Clear_Patricia (patricia_tree_t *patricia, data_fn_t func);
|
||||||
|
void Destroy_Patricia (patricia_tree_t *patricia, data_fn_t func);
|
||||||
|
|
||||||
|
void patricia_process (patricia_tree_t *patricia, prefix_data_fn_t func);
|
||||||
|
|
||||||
|
void Deref_Prefix (prefix_t * prefix);
|
||||||
|
char *prefix_toa (prefix_t * prefix);
|
||||||
|
|
||||||
|
/* { from demo.c */
|
||||||
|
|
||||||
|
prefix_t *
|
||||||
|
ascii2prefix (int family, char *string);
|
||||||
|
|
||||||
|
patricia_node_t *
|
||||||
|
make_and_lookup (patricia_tree_t *tree, char *string);
|
||||||
|
|
||||||
|
/* } */
|
||||||
|
|
||||||
|
#define PATRICIA_MAXBITS (sizeof(struct in6_addr) * 8)
|
||||||
|
#define PATRICIA_NBIT(x) (0x80 >> ((x) & 0x7f))
|
||||||
|
#define PATRICIA_NBYTE(x) ((x) >> 3)
|
||||||
|
|
||||||
|
#define PATRICIA_DATA_GET(node, type) (type *)((node)->data)
|
||||||
|
#define PATRICIA_DATA_SET(node, value) ((node)->data = (void *)(value))
|
||||||
|
|
||||||
|
#define PATRICIA_WALK(Xhead, Xnode) \
|
||||||
|
do { \
|
||||||
|
patricia_node_t *Xstack[PATRICIA_MAXBITS+1]; \
|
||||||
|
patricia_node_t **Xsp = Xstack; \
|
||||||
|
patricia_node_t *Xrn = (Xhead); \
|
||||||
|
while ((Xnode = Xrn)) { \
|
||||||
|
if (Xnode->prefix)
|
||||||
|
|
||||||
|
#define PATRICIA_WALK_ALL(Xhead, Xnode) \
|
||||||
|
do { \
|
||||||
|
patricia_node_t *Xstack[PATRICIA_MAXBITS+1]; \
|
||||||
|
patricia_node_t **Xsp = Xstack; \
|
||||||
|
patricia_node_t *Xrn = (Xhead); \
|
||||||
|
while ((Xnode = Xrn)) { \
|
||||||
|
if (1)
|
||||||
|
|
||||||
|
#define PATRICIA_WALK_BREAK { \
|
||||||
|
if (Xsp != Xstack) { \
|
||||||
|
Xrn = *(--Xsp); \
|
||||||
|
} else { \
|
||||||
|
Xrn = (patricia_node_t *) 0; \
|
||||||
|
} \
|
||||||
|
continue; }
|
||||||
|
|
||||||
|
#define PATRICIA_WALK_END \
|
||||||
|
if (Xrn->l) { \
|
||||||
|
if (Xrn->r) { \
|
||||||
|
*Xsp++ = Xrn->r; \
|
||||||
|
} \
|
||||||
|
Xrn = Xrn->l; \
|
||||||
|
} else if (Xrn->r) { \
|
||||||
|
Xrn = Xrn->r; \
|
||||||
|
} else if (Xsp != Xstack) { \
|
||||||
|
Xrn = *(--Xsp); \
|
||||||
|
} else { \
|
||||||
|
Xrn = (patricia_node_t *) 0; \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
} while (0)
|
52
src/3rdparty/setsignal.c
vendored
Normal file
52
src/3rdparty/setsignal.c
vendored
Normal file
|
@ -0,0 +1,52 @@
|
||||||
|
/*
|
||||||
|
* See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "zeek/zeek-config.h" /* must appear before first ifdef */
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#ifdef HAVE_MEMORY_H
|
||||||
|
#include <memory.h>
|
||||||
|
#endif
|
||||||
|
#include <signal.h>
|
||||||
|
#ifdef HAVE_SIGACTION
|
||||||
|
#include <string.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "setsignal.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* An os independent signal() with BSD semantics, e.g. the signal
|
||||||
|
* catcher is restored following service of the signal.
|
||||||
|
*
|
||||||
|
* When sigset() is available, signal() has SYSV semantics and sigset()
|
||||||
|
* has BSD semantics and call interface. Unfortunately, Linux does not
|
||||||
|
* have sigset() so we use the more complicated sigaction() interface
|
||||||
|
* there.
|
||||||
|
*
|
||||||
|
* Did I mention that signals suck?
|
||||||
|
*/
|
||||||
|
RETSIGTYPE
|
||||||
|
(*setsignal (int sig, RETSIGTYPE (*func)(int)))(int)
|
||||||
|
{
|
||||||
|
#ifdef HAVE_SIGACTION
|
||||||
|
struct sigaction old, new;
|
||||||
|
|
||||||
|
memset(&new, 0, sizeof(new));
|
||||||
|
new.sa_handler = func;
|
||||||
|
#ifdef SA_RESTART
|
||||||
|
new.sa_flags |= SA_RESTART;
|
||||||
|
#endif
|
||||||
|
if (sigaction(sig, &new, &old) < 0)
|
||||||
|
return (SIG_ERR);
|
||||||
|
return (old.sa_handler);
|
||||||
|
|
||||||
|
#else
|
||||||
|
#ifdef HAVE_SIGSET
|
||||||
|
return (sigset(sig, func));
|
||||||
|
#else
|
||||||
|
return (signal(sig, func));
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
}
|
7
src/3rdparty/setsignal.h
vendored
Normal file
7
src/3rdparty/setsignal.h
vendored
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
/*
|
||||||
|
* See the file "COPYING" in the main distribution directory for copyright.
|
||||||
|
*
|
||||||
|
*/
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
RETSIGTYPE (*setsignal(int, RETSIGTYPE (*)(int)))(int);
|
93
src/3rdparty/strsep.c
vendored
Normal file
93
src/3rdparty/strsep.c
vendored
Normal file
|
@ -0,0 +1,93 @@
|
||||||
|
/*-
|
||||||
|
* Copyright (c) 1990, 1993
|
||||||
|
* The Regents of the University of California. All rights reserved.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* modification, are permitted provided that the following conditions
|
||||||
|
* are met:
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
* 3. All advertising materials mentioning features or use of this software
|
||||||
|
* must display the following acknowledgement:
|
||||||
|
* This product includes software developed by the University of
|
||||||
|
* California, Berkeley and its contributors.
|
||||||
|
* 4. Neither the name of the University nor the names of its contributors
|
||||||
|
* may be used to endorse or promote products derived from this software
|
||||||
|
* without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
||||||
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||||
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||||
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
||||||
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||||
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||||
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||||
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||||
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||||
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||||
|
* SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "zeek/zeek-config.h"
|
||||||
|
|
||||||
|
#ifndef HAVE_STRSEP
|
||||||
|
|
||||||
|
#include <sys/types.h>
|
||||||
|
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
|
||||||
|
char *strsep(char **, const char *);
|
||||||
|
|
||||||
|
#if defined(LIBC_SCCS) && !defined(lint)
|
||||||
|
static char sccsid[] = "@(#)strsep.c 8.1 (Berkeley) 6/4/93";
|
||||||
|
#endif /* LIBC_SCCS and not lint */
|
||||||
|
#ifndef lint
|
||||||
|
static const char rcsid[] =
|
||||||
|
"$FreeBSD: src/lib/libc/string/strsep.c,v 1.2.12.1 2001/07/09 23:30:07 obrien Exp $";
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get next token from string *stringp, where tokens are possibly-empty
|
||||||
|
* strings separated by characters from delim.
|
||||||
|
*
|
||||||
|
* Writes NULs into the string at *stringp to end tokens.
|
||||||
|
* delim need not remain constant from call to call.
|
||||||
|
* On return, *stringp points past the last NUL written (if there might
|
||||||
|
* be further tokens), or is NULL (if there are definitely no more tokens).
|
||||||
|
*
|
||||||
|
* If *stringp is NULL, strsep returns NULL.
|
||||||
|
*/
|
||||||
|
char *
|
||||||
|
strsep(stringp, delim)
|
||||||
|
register char **stringp;
|
||||||
|
register const char *delim;
|
||||||
|
{
|
||||||
|
register char *s;
|
||||||
|
register const char *spanp;
|
||||||
|
register int c, sc;
|
||||||
|
char *tok;
|
||||||
|
|
||||||
|
if ((s = *stringp) == NULL)
|
||||||
|
return (NULL);
|
||||||
|
for (tok = s;;) {
|
||||||
|
c = *s++;
|
||||||
|
spanp = delim;
|
||||||
|
do {
|
||||||
|
if ((sc = *spanp++) == c) {
|
||||||
|
if (c == 0)
|
||||||
|
s = NULL;
|
||||||
|
else
|
||||||
|
s[-1] = 0;
|
||||||
|
*stringp = s;
|
||||||
|
return (tok);
|
||||||
|
}
|
||||||
|
} while (sc != 0);
|
||||||
|
}
|
||||||
|
/* NOTREACHED */
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
Loading…
Add table
Add a link
Reference in a new issue