diff options
| author | Ginger Bill <bill@gingerbill.org> | 2017-03-03 11:09:37 +0000 |
|---|---|---|
| committer | Ginger Bill <bill@gingerbill.org> | 2017-03-03 11:09:37 +0000 |
| commit | fb2d611dcd955833bf47b5b97a762ee8fb2dd1a6 (patch) | |
| tree | 75b3fc840fd317fe7b041c1b6123f941a882df7d /src/utf8proc/utf8proc.h | |
| parent | 9e8c9be1ea825c2b5c66e62724519ae9ab8ab8f6 (diff) | |
Update llvm binaries to latest version; Update utf8proc;
Diffstat (limited to 'src/utf8proc/utf8proc.h')
| -rw-r--r-- | src/utf8proc/utf8proc.h | 104 |
1 files changed, 80 insertions, 24 deletions
diff --git a/src/utf8proc/utf8proc.h b/src/utf8proc/utf8proc.h index 240fac66f..2dd8c1917 100644 --- a/src/utf8proc/utf8proc.h +++ b/src/utf8proc/utf8proc.h @@ -71,14 +71,15 @@ /** The MAJOR version number (increased when backwards API compatibility is broken). */ #define UTF8PROC_VERSION_MAJOR 2 /** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */ -#define UTF8PROC_VERSION_MINOR 0 +#define UTF8PROC_VERSION_MINOR 1 /** The PATCH version (increased for fixes that do not change the API). */ -#define UTF8PROC_VERSION_PATCH 2 +#define UTF8PROC_VERSION_PATCH 0 /** @} */ #include <stdlib.h> -#include <sys/types.h> -#ifdef _MSC_VER + +#if defined(_MSC_VER) && _MSC_VER < 1800 +// MSVC prior to 2013 lacked stdbool.h and inttypes.h typedef signed char utf8proc_int8_t; typedef unsigned char utf8proc_uint8_t; typedef short utf8proc_int16_t; @@ -93,12 +94,18 @@ typedef int utf8proc_ssize_t; typedef unsigned int utf8proc_size_t; # endif # ifndef __cplusplus +// emulate C99 bool typedef unsigned char utf8proc_bool; -// enum {false, true}; +# ifndef __bool_true_false_are_defined +# define false 0 +# define true 1 +# define __bool_true_false_are_defined 1 +# endif # else typedef bool utf8proc_bool; # endif #else +# include <stddef.h> # include <stdbool.h> # include <inttypes.h> typedef int8_t utf8proc_int8_t; @@ -108,22 +115,12 @@ typedef uint16_t utf8proc_uint16_t; typedef int32_t utf8proc_int32_t; typedef uint32_t utf8proc_uint32_t; typedef size_t utf8proc_size_t; -typedef ssize_t utf8proc_ssize_t; +typedef ptrdiff_t utf8proc_ssize_t; typedef bool utf8proc_bool; #endif #include <limits.h> -#ifdef _WIN32 -# ifdef UTF8PROC_EXPORTS -# define UTF8PROC_DLLEXPORT __declspec(dllexport) -# else -# define UTF8PROC_DLLEXPORT /*__declspec(dllimport)*/ -# endif -#elif __GNUC__ >= 4 -# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) -#else -# define UTF8PROC_DLLEXPORT -#endif +#define UTF8PROC_DLLEXPORT #ifdef __cplusplus extern "C" { @@ -134,7 +131,7 @@ extern "C" { #endif #ifndef UINT16_MAX -# define UINT16_MAX ~(utf8proc_uint16_t)0 +# define UINT16_MAX 65535U #endif /** @@ -374,6 +371,13 @@ typedef enum { } utf8proc_boundclass_t; /** + * Function pointer type passed to @ref utf8proc_map_custom and + * @ref utf8proc_decompose_custom, which is used to specify a user-defined + * mapping of codepoints to be applied in conjunction with other mappings. + */ +typedef utf8proc_int32_t (*utf8proc_custom_func)(utf8proc_int32_t codepoint, void *data); + +/** * Array containing the byte lengths of a UTF-8 encoded codepoint based * on the first byte. */ @@ -480,6 +484,7 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( * `buffer` (which must contain at least `bufsize` entries). In case of * success, the number of codepoints written is returned; in case of an * error, a negative error code is returned (@ref utf8proc_errmsg). + * See @ref utf8proc_decompose_custom to supply additional transformations. * * If the number of written codepoints would be bigger than `bufsize`, the * required buffer size is returned, while the buffer will be overwritten with @@ -491,8 +496,46 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( ); /** + * The same as @ref utf8proc_decompose, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. See also @ref utf8proc_map_custom. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, + utf8proc_int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + +/** + * Normalizes the sequence of `length` codepoints pointed to by `buffer` + * in-place (i.e., the result is also stored in `buffer`). + * + * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. + * @param length the length (in codepoints) of the buffer. + * @param options a bitwise or (`|`) of one or more of the following flags: + * - @ref UTF8PROC_NLF2LS - convert LF, CRLF, CR and NEL into LS + * - @ref UTF8PROC_NLF2PS - convert LF, CRLF, CR and NEL into PS + * - @ref UTF8PROC_NLF2LF - convert LF, CRLF, CR and NEL into LF + * - @ref UTF8PROC_STRIPCC - strip or convert all non-affected control characters + * - @ref UTF8PROC_COMPOSE - try to combine decomposed codepoints into composite + * codepoints + * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate + * the unicode versioning stability + * + * @return + * In case of success, the length (in codepoints) of the normalized UTF-32 string is + * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg). + * + * @warning The entries of the array pointed to by `str` have to be in the + * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_normalize_utf32(utf8proc_int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); + +/** * Reencodes the sequence of `length` codepoints pointed to by `buffer` * UTF-8 data in-place (i.e., the result is also stored in `buffer`). + * Can optionally normalize the UTF-32 sequence prior to UTF-8 conversion. * * @param buffer the (native-endian UTF-32) unicode codepoints to re-encode. * @param length the length (in codepoints) of the buffer. @@ -505,10 +548,12 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( * codepoints * - @ref UTF8PROC_STABLE - prohibit combining characters that would violate * the unicode versioning stability + * - @ref UTF8PROC_CHARBOUND - insert 0xFF bytes before each grapheme cluster * * @return - * In case of success, the length (in bytes) of the resulting UTF-8 string is - * returned; otherwise, a negative error code is returned (@ref utf8proc_errmsg). + * In case of success, the length (in bytes) of the resulting nul-terminated + * UTF-8 string is returned; otherwise, a negative error code is returned + * (@ref utf8proc_errmsg). * * @warning The amount of free space pointed to by `buffer` must * exceed the amount of the input data by one byte, and the @@ -595,7 +640,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(utf8proc_int32_t codepoi * in any case the result will be NULL terminated (though it might * contain NULL characters with the string if `str` contained NULL * characters). Other flags in the `options` field are passed to the - * functions defined above, and regarded as described. + * functions defined above, and regarded as described. See also + * @ref utfproc_map_custom to supply a custom codepoint transformation. * * In case of success the length of the new string is returned, * otherwise a negative error code is returned. @@ -607,6 +653,17 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options ); +/** + * Like @ref utf8proc_map, but also takes a `custom_func` mapping function + * that is called on each codepoint in `str` before any other transformations + * (along with a `custom_data` pointer that is passed through to `custom_func`). + * The `custom_func` argument is ignored if it is `NULL`. + */ +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map_custom( + const utf8proc_uint8_t *str, utf8proc_ssize_t strlen, utf8proc_uint8_t **dstptr, utf8proc_option_t options, + utf8proc_custom_func custom_func, void *custom_data +); + /** @name Unicode normalization * * Returns a pointer to newly allocated memory of a NFD, NFC, NFKD or NFKC @@ -619,9 +676,9 @@ UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFD(const utf8proc_uint8_t *str); /** NFC normalization (@ref UTF8PROC_COMPOSE). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFC(const utf8proc_uint8_t *str); -/** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ +/** NFKD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKD(const utf8proc_uint8_t *str); -/** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ +/** NFKC normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); /** @} */ @@ -630,4 +687,3 @@ UTF8PROC_DLLEXPORT utf8proc_uint8_t *utf8proc_NFKC(const utf8proc_uint8_t *str); #endif #endif - |