mirror of
https://github.com/ipxe/ipxe
synced 2025-12-16 17:41:18 +03:00
[utf8] Add ability to accumulate Unicode characters from UTF-8 bytes
Signed-off-by: Michael Brown <mcb30@ipxe.org>
This commit is contained in:
69
src/include/ipxe/utf8.h
Normal file
69
src/include/ipxe/utf8.h
Normal file
@@ -0,0 +1,69 @@
|
||||
#ifndef _IPXE_UTF8_H
|
||||
#define _IPXE_UTF8_H
|
||||
|
||||
/** @file
|
||||
*
|
||||
* UTF-8 Unicode encoding
|
||||
*
|
||||
*/
|
||||
|
||||
FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
/** Maximum length of UTF-8 sequence */
|
||||
#define UTF8_MAX_LEN 4
|
||||
|
||||
/** Minimum legal value for two-byte UTF-8 sequence */
|
||||
#define UTF8_MIN_TWO 0x80
|
||||
|
||||
/** Minimum legal value for three-byte UTF-8 sequence */
|
||||
#define UTF8_MIN_THREE 0x800
|
||||
|
||||
/** Minimum legal value for four-byte UTF-8 sequence */
|
||||
#define UTF8_MIN_FOUR 0x10000
|
||||
|
||||
/** High bit of UTF-8 bytes */
|
||||
#define UTF8_HIGH_BIT 0x80
|
||||
|
||||
/** Number of data bits in each continuation byte */
|
||||
#define UTF8_CONTINUATION_BITS 6
|
||||
|
||||
/** Bit mask for data bits in a continuation byte */
|
||||
#define UTF8_CONTINUATION_MASK ( ( 1 << UTF8_CONTINUATION_BITS ) - 1 )
|
||||
|
||||
/** Non-data bits in a continuation byte */
|
||||
#define UTF8_CONTINUATION 0x80
|
||||
|
||||
/** Check for a continuation byte
|
||||
*
|
||||
* @v byte UTF-8 byte
|
||||
* @ret is_continuation Byte is a continuation byte
|
||||
*/
|
||||
#define UTF8_IS_CONTINUATION( byte ) \
|
||||
( ( (byte) & ~UTF8_CONTINUATION_MASK ) == UTF8_CONTINUATION )
|
||||
|
||||
/** Check for an ASCII byte
|
||||
*
|
||||
* @v byte UTF-8 byte
|
||||
* @ret is_ascii Byte is an ASCII byte
|
||||
*/
|
||||
#define UTF8_IS_ASCII( byte ) ( ! ( (byte) & UTF8_HIGH_BIT ) )
|
||||
|
||||
/** Invalid character returned when decoding fails */
|
||||
#define UTF8_INVALID 0xfffd
|
||||
|
||||
/** A UTF-8 character accumulator */
|
||||
struct utf8_accumulator {
|
||||
/** Character in progress */
|
||||
unsigned int character;
|
||||
/** Number of remaining continuation bytes */
|
||||
unsigned int remaining;
|
||||
/** Minimum legal character */
|
||||
unsigned int min;
|
||||
};
|
||||
|
||||
extern unsigned int utf8_accumulate ( struct utf8_accumulator *utf8,
|
||||
uint8_t byte );
|
||||
|
||||
#endif /* _IPXE_UTF8_H */
|
||||
Reference in New Issue
Block a user