ADDED: TextToUtf8() -WIP-
RENAMED: TextCountCodepoints() -> GetCodepointsCount()
This commit is contained in:
parent
3f12fa54f7
commit
f6df47dfe5
2 changed files with 201 additions and 159 deletions
11
src/raylib.h
11
src/raylib.h
|
@ -1188,14 +1188,11 @@ RLAPI void DrawTextRecEx(Font font, const char *text, Rectangle rec, float fontS
|
||||||
RLAPI int MeasureText(const char *text, int fontSize); // Measure string width for default font
|
RLAPI int MeasureText(const char *text, int fontSize); // Measure string width for default font
|
||||||
RLAPI Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing); // Measure string size for Font
|
RLAPI Vector2 MeasureTextEx(Font font, const char *text, float fontSize, float spacing); // Measure string size for Font
|
||||||
RLAPI int GetGlyphIndex(Font font, int character); // Get index position for a unicode character on font
|
RLAPI int GetGlyphIndex(Font font, int character); // Get index position for a unicode character on font
|
||||||
RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed); // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure
|
|
||||||
RLAPI int *GetCodepoints(const char *text, int *count); // Get all codepoints in a string, codepoints count returned by parameters
|
|
||||||
|
|
||||||
// Text strings management functions
|
// Text strings management functions (no utf8 strings, only byte chars)
|
||||||
// NOTE: Some strings allocate memory internally for returned strings, just be careful!
|
// NOTE: Some strings allocate memory internally for returned strings, just be careful!
|
||||||
RLAPI bool TextIsEqual(const char *text1, const char *text2); // Check if two text string are equal
|
RLAPI bool TextIsEqual(const char *text1, const char *text2); // Check if two text string are equal
|
||||||
RLAPI unsigned int TextLength(const char *text); // Get text length, checks for '\0' ending
|
RLAPI unsigned int TextLength(const char *text); // Get text length, checks for '\0' ending
|
||||||
RLAPI unsigned int TextCountCodepoints(const char *text); // Get total number of characters (codepoints) in a UTF8 encoded string
|
|
||||||
RLAPI const char *TextFormat(const char *text, ...); // Text formatting with variables (sprintf style)
|
RLAPI const char *TextFormat(const char *text, ...); // Text formatting with variables (sprintf style)
|
||||||
RLAPI const char *TextSubtext(const char *text, int position, int length); // Get a piece of a text string
|
RLAPI const char *TextSubtext(const char *text, int position, int length); // Get a piece of a text string
|
||||||
RLAPI char *TextReplace(char *text, const char *replace, const char *by); // Replace text string (memory should be freed!)
|
RLAPI char *TextReplace(char *text, const char *replace, const char *by); // Replace text string (memory should be freed!)
|
||||||
|
@ -1208,6 +1205,12 @@ RLAPI const char *TextToUpper(const char *text); // Get upp
|
||||||
RLAPI const char *TextToLower(const char *text); // Get lower case version of provided string
|
RLAPI const char *TextToLower(const char *text); // Get lower case version of provided string
|
||||||
RLAPI const char *TextToPascal(const char *text); // Get Pascal case notation version of provided string
|
RLAPI const char *TextToPascal(const char *text); // Get Pascal case notation version of provided string
|
||||||
RLAPI int TextToInteger(const char *text); // Get integer value from text (negative values not supported)
|
RLAPI int TextToInteger(const char *text); // Get integer value from text (negative values not supported)
|
||||||
|
RLAPI const char *TextToUtf8(int codepoint, int *byteLength); // Encode codepoint into utf8 text (char array length returned as parameter)
|
||||||
|
|
||||||
|
// UTF8 text strings management functions
|
||||||
|
RLAPI int *GetCodepoints(const char *text, int *count); // Get all codepoints in a string, codepoints count returned by parameters
|
||||||
|
RLAPI int GetCodepointsCount(const char *text); // Get total number of characters (codepoints) in a UTF8 encoded string
|
||||||
|
RLAPI int GetNextCodepoint(const char *text, int *bytesProcessed); // Returns next codepoint in a UTF8 encoded string; 0x3f('?') is returned on failure
|
||||||
|
|
||||||
//------------------------------------------------------------------------------------
|
//------------------------------------------------------------------------------------
|
||||||
// Basic 3d Shapes Drawing Functions (Module: models)
|
// Basic 3d Shapes Drawing Functions (Module: models)
|
||||||
|
|
349
src/text.c
349
src/text.c
|
@ -777,140 +777,6 @@ void DrawFPS(int posX, int posY)
|
||||||
DrawText(TextFormat("%2i FPS", fps), posX, posY, 20, LIME);
|
DrawText(TextFormat("%2i FPS", fps), posX, posY, 20, LIME);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found
|
|
||||||
// When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned
|
|
||||||
// Total number of bytes processed are returned as a parameter
|
|
||||||
// NOTE: the standard says U+FFFD should be returned in case of errors
|
|
||||||
// but that character is not supported by the default font in raylib
|
|
||||||
// TODO: optimize this code for speed!!
|
|
||||||
int GetNextCodepoint(const char *text, int *bytesProcessed)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt
|
|
||||||
|
|
||||||
Char. number range | UTF-8 octet sequence
|
|
||||||
(hexadecimal) | (binary)
|
|
||||||
--------------------+---------------------------------------------
|
|
||||||
0000 0000-0000 007F | 0xxxxxxx
|
|
||||||
0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
|
||||||
0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
|
||||||
0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
||||||
*/
|
|
||||||
|
|
||||||
// NOTE: on decode errors we return as soon as possible
|
|
||||||
|
|
||||||
int code = 0x3f; // Codepoint (defaults to '?')
|
|
||||||
int octet = (unsigned char)(text[0]); // The first UTF8 octet
|
|
||||||
*bytesProcessed = 1;
|
|
||||||
|
|
||||||
if (octet <= 0x7f)
|
|
||||||
{
|
|
||||||
// Only one octet (ASCII range x00-7F)
|
|
||||||
code = text[0];
|
|
||||||
}
|
|
||||||
else if ((octet & 0xe0) == 0xc0)
|
|
||||||
{
|
|
||||||
// Two octets
|
|
||||||
// [0]xC2-DF [1]UTF8-tail(x80-BF)
|
|
||||||
unsigned char octet1 = text[1];
|
|
||||||
|
|
||||||
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
if ((octet >= 0xc2) && (octet <= 0xdf))
|
|
||||||
{
|
|
||||||
code = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
|
|
||||||
*bytesProcessed = 2;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ((octet & 0xf0) == 0xe0)
|
|
||||||
{
|
|
||||||
// Three octets
|
|
||||||
unsigned char octet1 = text[1];
|
|
||||||
unsigned char octet2 = '\0';
|
|
||||||
|
|
||||||
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
octet2 = text[2];
|
|
||||||
|
|
||||||
if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
/*
|
|
||||||
[0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
|
|
||||||
[0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
||||||
[0]xED [1]x80-9F [2]UTF8-tail(x80-BF)
|
|
||||||
[0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
|
|
||||||
((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; }
|
|
||||||
|
|
||||||
if ((octet >= 0xe0) && (0 <= 0xef))
|
|
||||||
{
|
|
||||||
code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
|
|
||||||
*bytesProcessed = 3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if ((octet & 0xf8) == 0xf0)
|
|
||||||
{
|
|
||||||
// Four octets
|
|
||||||
if (octet > 0xf4) return code;
|
|
||||||
|
|
||||||
unsigned char octet1 = text[1];
|
|
||||||
unsigned char octet2 = '\0';
|
|
||||||
unsigned char octet3 = '\0';
|
|
||||||
|
|
||||||
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
octet2 = text[2];
|
|
||||||
|
|
||||||
if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
octet3 = text[3];
|
|
||||||
|
|
||||||
if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
/*
|
|
||||||
[0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
|
|
||||||
[0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
|
|
||||||
[0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
|
|
||||||
*/
|
|
||||||
|
|
||||||
if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
|
|
||||||
((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
|
||||||
|
|
||||||
if (octet >= 0xf0)
|
|
||||||
{
|
|
||||||
code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
|
|
||||||
*bytesProcessed = 4;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (code > 0x10ffff) code = 0x3f; // Codepoints after U+10ffff are invalid
|
|
||||||
|
|
||||||
return code;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get all codepoints in a string, codepoints count returned by parameters
|
|
||||||
int *GetCodepoints(const char *text, int *count)
|
|
||||||
{
|
|
||||||
static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 };
|
|
||||||
memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int));
|
|
||||||
|
|
||||||
int bytesProcessed = 0;
|
|
||||||
int textLength = strlen(text);
|
|
||||||
int codepointsCount = 0;
|
|
||||||
|
|
||||||
for (int i = 0; i < textLength; codepointsCount++)
|
|
||||||
{
|
|
||||||
codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed);
|
|
||||||
i += bytesProcessed;
|
|
||||||
}
|
|
||||||
|
|
||||||
*count = codepointsCount;
|
|
||||||
|
|
||||||
return codepoints;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Draw text (using default font)
|
// Draw text (using default font)
|
||||||
// NOTE: fontSize work like in any drawing program but if fontSize is lower than font-base-size, then font-base-size is used
|
// NOTE: fontSize work like in any drawing program but if fontSize is lower than font-base-size, then font-base-size is used
|
||||||
// NOTE: chars spacing is proportional to fontSize
|
// NOTE: chars spacing is proportional to fontSize
|
||||||
|
@ -1235,27 +1101,6 @@ unsigned int TextLength(const char *text)
|
||||||
return length;
|
return length;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found
|
|
||||||
// NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead
|
|
||||||
unsigned int TextCountCodepoints(const char *text)
|
|
||||||
{
|
|
||||||
unsigned int len = 0;
|
|
||||||
char *ptr = (char *)&text[0];
|
|
||||||
|
|
||||||
while (*ptr != '\0')
|
|
||||||
{
|
|
||||||
int next = 0;
|
|
||||||
int letter = GetNextCodepoint(ptr, &next);
|
|
||||||
|
|
||||||
if (letter == 0x3f) ptr += 1;
|
|
||||||
else ptr += next;
|
|
||||||
|
|
||||||
len++;
|
|
||||||
}
|
|
||||||
|
|
||||||
return len;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Formatting of text with variables to 'embed'
|
// Formatting of text with variables to 'embed'
|
||||||
const char *TextFormat(const char *text, ...)
|
const char *TextFormat(const char *text, ...)
|
||||||
{
|
{
|
||||||
|
@ -1531,6 +1376,200 @@ int TextToInteger(const char *text)
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Encode codepoint into utf8 text (char array length returned as parameter)
|
||||||
|
RLAPI const char *TextToUtf8(int codepoint, int *byteLength)
|
||||||
|
{
|
||||||
|
static char utf8[6] = { 0 };
|
||||||
|
int length = 0;
|
||||||
|
|
||||||
|
if (codepoint <= 0x7f)
|
||||||
|
{
|
||||||
|
utf8[0] = (char)codepoint;
|
||||||
|
length = 1;
|
||||||
|
}
|
||||||
|
else if (codepoint <= 0x7ff)
|
||||||
|
{
|
||||||
|
utf8[0] = (char)(((codepoint >> 6) & 0x1f) | 0xc0);
|
||||||
|
utf8[1] = (char)((codepoint & 0x3f) | 0x80);
|
||||||
|
length = 2;
|
||||||
|
}
|
||||||
|
else if (codepoint <= 0xffff)
|
||||||
|
{
|
||||||
|
utf8[0] = (char)(((codepoint >> 12) & 0x0f) | 0xe0);
|
||||||
|
utf8[1] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
||||||
|
utf8[2] = (char)((codepoint & 0x3f) | 0x80);
|
||||||
|
length = 3;
|
||||||
|
}
|
||||||
|
else if (codepoint <= 0x10ffff)
|
||||||
|
{
|
||||||
|
utf8[0] = (char)(((codepoint >> 18) & 0x07) | 0xf0);
|
||||||
|
utf8[1] = (char)(((codepoint >> 12) & 0x3f) | 0x80);
|
||||||
|
utf8[2] = (char)(((codepoint >> 6) & 0x3f) | 0x80);
|
||||||
|
utf8[3] = (char)((codepoint & 0x3f) | 0x80);
|
||||||
|
length = 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
*byteLength = length;
|
||||||
|
|
||||||
|
return utf8;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Get all codepoints in a string, codepoints count returned by parameters
|
||||||
|
int *GetCodepoints(const char *text, int *count)
|
||||||
|
{
|
||||||
|
static int codepoints[MAX_TEXT_UNICODE_CHARS] = { 0 };
|
||||||
|
memset(codepoints, 0, MAX_TEXT_UNICODE_CHARS*sizeof(int));
|
||||||
|
|
||||||
|
int bytesProcessed = 0;
|
||||||
|
int textLength = strlen(text);
|
||||||
|
int codepointsCount = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < textLength; codepointsCount++)
|
||||||
|
{
|
||||||
|
codepoints[codepointsCount] = GetNextCodepoint(text + i, &bytesProcessed);
|
||||||
|
i += bytesProcessed;
|
||||||
|
}
|
||||||
|
|
||||||
|
*count = codepointsCount;
|
||||||
|
|
||||||
|
return codepoints;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Returns total number of characters(codepoints) in a UTF8 encoded text, until '\0' is found
|
||||||
|
// NOTE: If an invalid UTF8 sequence is encountered a '?'(0x3f) codepoint is counted instead
|
||||||
|
int GetCodepointsCount(const char *text)
|
||||||
|
{
|
||||||
|
unsigned int len = 0;
|
||||||
|
char *ptr = (char *)&text[0];
|
||||||
|
|
||||||
|
while (*ptr != '\0')
|
||||||
|
{
|
||||||
|
int next = 0;
|
||||||
|
int letter = GetNextCodepoint(ptr, &next);
|
||||||
|
|
||||||
|
if (letter == 0x3f) ptr += 1;
|
||||||
|
else ptr += next;
|
||||||
|
|
||||||
|
len++;
|
||||||
|
}
|
||||||
|
|
||||||
|
return len;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Returns next codepoint in a UTF8 encoded text, scanning until '\0' is found
|
||||||
|
// When a invalid UTF8 byte is encountered we exit as soon as possible and a '?'(0x3f) codepoint is returned
|
||||||
|
// Total number of bytes processed are returned as a parameter
|
||||||
|
// NOTE: the standard says U+FFFD should be returned in case of errors
|
||||||
|
// but that character is not supported by the default font in raylib
|
||||||
|
// TODO: optimize this code for speed!!
|
||||||
|
int GetNextCodepoint(const char *text, int *bytesProcessed)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
UTF8 specs from https://www.ietf.org/rfc/rfc3629.txt
|
||||||
|
|
||||||
|
Char. number range | UTF-8 octet sequence
|
||||||
|
(hexadecimal) | (binary)
|
||||||
|
--------------------+---------------------------------------------
|
||||||
|
0000 0000-0000 007F | 0xxxxxxx
|
||||||
|
0000 0080-0000 07FF | 110xxxxx 10xxxxxx
|
||||||
|
0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
|
||||||
|
0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
||||||
|
*/
|
||||||
|
// NOTE: on decode errors we return as soon as possible
|
||||||
|
|
||||||
|
int code = 0x3f; // Codepoint (defaults to '?')
|
||||||
|
int octet = (unsigned char)(text[0]); // The first UTF8 octet
|
||||||
|
*bytesProcessed = 1;
|
||||||
|
|
||||||
|
if (octet <= 0x7f)
|
||||||
|
{
|
||||||
|
// Only one octet (ASCII range x00-7F)
|
||||||
|
code = text[0];
|
||||||
|
}
|
||||||
|
else if ((octet & 0xe0) == 0xc0)
|
||||||
|
{
|
||||||
|
// Two octets
|
||||||
|
// [0]xC2-DF [1]UTF8-tail(x80-BF)
|
||||||
|
unsigned char octet1 = text[1];
|
||||||
|
|
||||||
|
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
if ((octet >= 0xc2) && (octet <= 0xdf))
|
||||||
|
{
|
||||||
|
code = ((octet & 0x1f) << 6) | (octet1 & 0x3f);
|
||||||
|
*bytesProcessed = 2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((octet & 0xf0) == 0xe0)
|
||||||
|
{
|
||||||
|
// Three octets
|
||||||
|
unsigned char octet1 = text[1];
|
||||||
|
unsigned char octet2 = '\0';
|
||||||
|
|
||||||
|
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
octet2 = text[2];
|
||||||
|
|
||||||
|
if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
/*
|
||||||
|
[0]xE0 [1]xA0-BF [2]UTF8-tail(x80-BF)
|
||||||
|
[0]xE1-EC [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
||||||
|
[0]xED [1]x80-9F [2]UTF8-tail(x80-BF)
|
||||||
|
[0]xEE-EF [1]UTF8-tail [2]UTF8-tail(x80-BF)
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (((octet == 0xe0) && !((octet1 >= 0xa0) && (octet1 <= 0xbf))) ||
|
||||||
|
((octet == 0xed) && !((octet1 >= 0x80) && (octet1 <= 0x9f)))) { *bytesProcessed = 2; return code; }
|
||||||
|
|
||||||
|
if ((octet >= 0xe0) && (0 <= 0xef))
|
||||||
|
{
|
||||||
|
code = ((octet & 0xf) << 12) | ((octet1 & 0x3f) << 6) | (octet2 & 0x3f);
|
||||||
|
*bytesProcessed = 3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if ((octet & 0xf8) == 0xf0)
|
||||||
|
{
|
||||||
|
// Four octets
|
||||||
|
if (octet > 0xf4) return code;
|
||||||
|
|
||||||
|
unsigned char octet1 = text[1];
|
||||||
|
unsigned char octet2 = '\0';
|
||||||
|
unsigned char octet3 = '\0';
|
||||||
|
|
||||||
|
if ((octet1 == '\0') || ((octet1 >> 6) != 2)) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
octet2 = text[2];
|
||||||
|
|
||||||
|
if ((octet2 == '\0') || ((octet2 >> 6) != 2)) { *bytesProcessed = 3; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
octet3 = text[3];
|
||||||
|
|
||||||
|
if ((octet3 == '\0') || ((octet3 >> 6) != 2)) { *bytesProcessed = 4; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
/*
|
||||||
|
[0]xF0 [1]x90-BF [2]UTF8-tail [3]UTF8-tail
|
||||||
|
[0]xF1-F3 [1]UTF8-tail [2]UTF8-tail [3]UTF8-tail
|
||||||
|
[0]xF4 [1]x80-8F [2]UTF8-tail [3]UTF8-tail
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (((octet == 0xf0) && !((octet1 >= 0x90) && (octet1 <= 0xbf))) ||
|
||||||
|
((octet == 0xf4) && !((octet1 >= 0x80) && (octet1 <= 0x8f)))) { *bytesProcessed = 2; return code; } // Unexpected sequence
|
||||||
|
|
||||||
|
if (octet >= 0xf0)
|
||||||
|
{
|
||||||
|
code = ((octet & 0x7) << 18) | ((octet1 & 0x3f) << 12) | ((octet2 & 0x3f) << 6) | (octet3 & 0x3f);
|
||||||
|
*bytesProcessed = 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (code > 0x10ffff) code = 0x3f; // Codepoints after U+10ffff are invalid
|
||||||
|
|
||||||
|
return code;
|
||||||
|
}
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
|
|
||||||
//----------------------------------------------------------------------------------
|
//----------------------------------------------------------------------------------
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue