Core: Integrated Unicode data
This commit is contained in:
parent
cf24b8abe4
commit
39d1d31639
|
|
@ -48,15 +48,19 @@ local DirectionToString = {}
|
||||||
DirectionToString["EN"] = "Direction_European_Number"
|
DirectionToString["EN"] = "Direction_European_Number"
|
||||||
DirectionToString["ES"] = "Direction_European_Separator"
|
DirectionToString["ES"] = "Direction_European_Separator"
|
||||||
DirectionToString["ET"] = "Direction_European_Terminator"
|
DirectionToString["ET"] = "Direction_European_Terminator"
|
||||||
|
DirectionToString["FSI"] = "Direction_First_Strong_Isolate"
|
||||||
DirectionToString["L"] = "Direction_Left_To_Right"
|
DirectionToString["L"] = "Direction_Left_To_Right"
|
||||||
DirectionToString["LRE"] = "Direction_Left_To_Right_Embedding"
|
DirectionToString["LRE"] = "Direction_Left_To_Right_Embedding"
|
||||||
|
DirectionToString["LRI"] = "Direction_Left_To_Right_Isolate"
|
||||||
DirectionToString["LRO"] = "Direction_Left_To_Right_Override"
|
DirectionToString["LRO"] = "Direction_Left_To_Right_Override"
|
||||||
DirectionToString["NSM"] = "Direction_Nonspacing_Mark"
|
DirectionToString["NSM"] = "Direction_Nonspacing_Mark"
|
||||||
DirectionToString["ON"] = "Direction_Other_Neutral"
|
DirectionToString["ON"] = "Direction_Other_Neutral"
|
||||||
DirectionToString["B"] = "Direction_Paragraph_Separator"
|
DirectionToString["B"] = "Direction_Paragraph_Separator"
|
||||||
DirectionToString["PDF"] = "Direction_Pop_Directional_Format"
|
DirectionToString["PDF"] = "Direction_Pop_Directional_Formatting"
|
||||||
|
DirectionToString["PDI"] = "Direction_Pop_Directional_Isolate"
|
||||||
DirectionToString["R"] = "Direction_Right_To_Left"
|
DirectionToString["R"] = "Direction_Right_To_Left"
|
||||||
DirectionToString["RLE"] = "Direction_Right_To_Left_Embedding"
|
DirectionToString["RLE"] = "Direction_Right_To_Left_Embedding"
|
||||||
|
DirectionToString["RLI"] = "Direction_Right_To_Left_Isolate"
|
||||||
DirectionToString["RLO"] = "Direction_Right_To_Left_Override"
|
DirectionToString["RLO"] = "Direction_Right_To_Left_Override"
|
||||||
DirectionToString["S"] = "Direction_Segment_Separator"
|
DirectionToString["S"] = "Direction_Segment_Separator"
|
||||||
DirectionToString["WS"] = "Direction_White_Space"
|
DirectionToString["WS"] = "Direction_White_Space"
|
||||||
|
|
@ -69,7 +73,7 @@ table.maxn = table.maxn or function (tab) -- Compatibilit
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
local function getCharacter(tab, first, index)
|
local function getCharacter(tab, first, index)
|
||||||
local character = {}
|
local character = {}
|
||||||
character.Category = CategoryToString[tab[3]] or "Category_NoCategory"
|
character.Category = CategoryToString[tab[3]] or "Category_NoCategory"
|
||||||
|
|
@ -83,112 +87,179 @@ end
|
||||||
|
|
||||||
ACTION.Function = function ()
|
ACTION.Function = function ()
|
||||||
local unicodeSet = {}
|
local unicodeSet = {}
|
||||||
|
if (not os.isdir("scripts/data") and not os.mkdir("scripts/data")) then
|
||||||
|
print("Failed to create scripts/data folder")
|
||||||
|
end
|
||||||
|
|
||||||
file = io.open ("scripts/data/UnicodeData.txt", "r")
|
local filepath = "scripts/data/UnicodeData.txt"
|
||||||
|
|
||||||
|
print("Downloading UnicodeData.txt...")
|
||||||
|
|
||||||
|
local t1 = os.clock()
|
||||||
|
|
||||||
|
local result_str, response_code = http.download("https://www.unicode.org/Public/UCD/latest/ucd/UnicodeData.txt", filepath, {
|
||||||
|
headers = { "From: Premake", "Referer: Premake" }
|
||||||
|
})
|
||||||
|
|
||||||
|
if (response_code ~= 200) then
|
||||||
|
error("Failed to download UnicodeData.txt")
|
||||||
|
end
|
||||||
|
|
||||||
|
local fileInfo = os.stat(filepath)
|
||||||
|
|
||||||
|
local t2 = os.clock()
|
||||||
|
|
||||||
|
print(string.format("Download succeeded (%.3f MiB) in %fs (%d KiB/s)", fileInfo.size / (1024 * 1024), t2 - t1, math.floor((fileInfo.size / (t2 - t1)) / 1024)))
|
||||||
|
|
||||||
|
file = io.open (filepath, "r")
|
||||||
if (not file) then
|
if (not file) then
|
||||||
error("Unable to open Unicode Data file")
|
error("Unable to open Unicode Data file")
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
local t1 = os.clock()
|
local characters = {}
|
||||||
|
local characterSets = {}
|
||||||
|
local lowercaseCharacters = {}
|
||||||
|
local titlecaseCharacters = {}
|
||||||
|
local uppercaseCharacters = {}
|
||||||
|
local currentBlock
|
||||||
|
local currentBlockStartCodepoint
|
||||||
|
local lineIndex = 1
|
||||||
|
|
||||||
|
t1 = os.clock()
|
||||||
|
|
||||||
print("Parsing UnicodeData.txt...")
|
print("Parsing UnicodeData.txt...")
|
||||||
local first = 0
|
|
||||||
local last = 0
|
|
||||||
unicodeSet[0] = {}
|
|
||||||
unicodeSet[0].First = 0
|
|
||||||
unicodeSet[0].Characters = {}
|
|
||||||
local currentSet = 0
|
|
||||||
local inblock = false
|
|
||||||
local blockData = nil
|
|
||||||
local unusedIndex = 0
|
|
||||||
local c = 0
|
|
||||||
for line in file:lines() do
|
for line in file:lines() do
|
||||||
local old = 0
|
local parts = line:explode(";")
|
||||||
local start = string.find(line, ';', old)
|
|
||||||
local tab = {}
|
local codepoint = tonumber(parts[1], 16)
|
||||||
while (start) do
|
local characterName = parts[2]
|
||||||
tab[#tab+1] = string.sub(line, old, start-1, old)
|
local category = parts[3]
|
||||||
old = start+1
|
local direction = parts[5]
|
||||||
start = string.find(line, ';', old)
|
local uppercaseMapping = tonumber(parts[13], 16)
|
||||||
end
|
local lowercaseMapping = tonumber(parts[14], 16)
|
||||||
tab[#tab+1] = string.sub(line, old)
|
local titlecaseMapping = tonumber(parts[15], 16)
|
||||||
|
|
||||||
local index = tonumber(tab[1], 16)
|
local blockName, blockId = string.match(characterName, "<(.+), (%w+)>")
|
||||||
if (index > 0 and not inblock) then
|
if (currentBlock) then
|
||||||
if (index-last > 1000) then
|
if (blockId ~= "Last") then
|
||||||
unicodeSet[currentSet].Last = last
|
error("Parsing error: expected last block at line " .. lineIndex)
|
||||||
currentSet = currentSet + 1
|
end
|
||||||
unicodeSet[currentSet] = {}
|
|
||||||
unicodeSet[currentSet].First = index
|
print("Detected set " .. blockName .. " from codepoint " .. currentBlockStartCodepoint .. " to " .. codepoint)
|
||||||
unicodeSet[currentSet].Characters = {}
|
|
||||||
print("Set detected (Begin at " .. first .. ", end at " .. last .. ")")
|
table.insert(characterSets, {
|
||||||
first = index
|
startCodepoint = currentBlockStartCodepoint,
|
||||||
|
endCodepoint = codepoint,
|
||||||
|
name = "<" .. blockName .. ">",
|
||||||
|
category = category,
|
||||||
|
direction = direction
|
||||||
|
})
|
||||||
|
|
||||||
|
currentBlock = nil
|
||||||
|
else
|
||||||
|
if (blockName) then
|
||||||
|
if (blockId ~= "First") then
|
||||||
|
error("Parsing error: expected first block at line " .. lineIndex)
|
||||||
|
end
|
||||||
|
|
||||||
|
currentBlock = blockName
|
||||||
|
currentBlockStartCodepoint = codepoint
|
||||||
else
|
else
|
||||||
unusedIndex = unusedIndex + index-last-1
|
table.insert(characters, {
|
||||||
end
|
codepoint = codepoint,
|
||||||
end
|
name = characterName,
|
||||||
|
category = category,
|
||||||
local blockName, blockId = string.match(tab[2], "<(.+), (%w+)>")
|
direction = direction,
|
||||||
if (blockName ~= nil and blockId ~= nil) then
|
upper = uppercaseMapping,
|
||||||
if (blockId == "First") then
|
lower = lowercaseMapping,
|
||||||
if (inblock) then
|
title = titlecaseMapping
|
||||||
error("Already in block (" .. tab[1] .. ")")
|
})
|
||||||
|
|
||||||
|
if (lowercaseMapping) then
|
||||||
|
table.insert(lowercaseCharacters, {codepoint = codepoint, lower = lowercaseMapping})
|
||||||
end
|
end
|
||||||
inblock = true
|
|
||||||
blockCharacter = getCharacter(tab, first)
|
if (titlecaseMapping) then
|
||||||
elseif (blockId == "Last") then
|
table.insert(titlecaseCharacters, {codepoint = codepoint, title = titlecaseMapping})
|
||||||
if (not inblock) then
|
|
||||||
error("Not in block (" .. tab[1] .. ")")
|
|
||||||
end
|
end
|
||||||
inblock = false
|
|
||||||
for i=first, index do
|
if (uppercaseMapping) then
|
||||||
unicodeSet[currentSet].Characters[i] = getCharacter(tab, first, i)
|
table.insert(uppercaseCharacters, {codepoint = codepoint, upper = uppercaseMapping})
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
unicodeSet[currentSet].Characters[index - first] = getCharacter(tab, first, index)
|
lineIndex = lineIndex + 1
|
||||||
if (unicodeSet[currentSet].Characters[index - first].LowerCase ~= (index - first) or
|
|
||||||
unicodeSet[currentSet].Characters[index - first].UpperCase ~= (index - first) or
|
|
||||||
unicodeSet[currentSet].Characters[index - first].TitleCase ~= (index - first)) then
|
|
||||||
c = c + 1
|
|
||||||
end
|
|
||||||
|
|
||||||
last = index
|
|
||||||
end
|
end
|
||||||
unicodeSet[currentSet].Last = last
|
|
||||||
print("Set detected (Begin at " .. first .. ", end at " .. last .. ")")
|
|
||||||
file:close()
|
|
||||||
|
|
||||||
print("Parsed " .. last+1 .. " characters in " .. #unicodeSet .. " sets, " .. unusedIndex .. " unused indices (took " .. os.difftime(os.clock(), t1) .. " sec)")
|
t2 = os.clock()
|
||||||
|
|
||||||
|
print("Parsed " .. #characters .. " characters in " .. (t2 - t1) .. " seconds")
|
||||||
|
|
||||||
|
print("Writting Unicode Data to header...")
|
||||||
|
|
||||||
file = io.open("../src/Nazara/Core/UnicodeData.hpp", "w+")
|
file = io.open("../src/Nazara/Core/UnicodeData.hpp", "w+")
|
||||||
if (not file) then
|
if (not file) then
|
||||||
error("Unable to create Unicode Data header")
|
error("Failed to open Unicode Data header")
|
||||||
return
|
return
|
||||||
end
|
end
|
||||||
|
|
||||||
print("Writting Unicode Data to header...")
|
|
||||||
|
|
||||||
t1 = os.clock()
|
t1 = os.clock()
|
||||||
for i=0, #unicodeSet do
|
|
||||||
local maxn = table.maxn(unicodeSet[i].Characters)
|
|
||||||
file:write(string.format("Character unicodeSet%d[%d] = {\n", i, maxn+1))
|
|
||||||
|
|
||||||
for j=0, maxn do
|
file:write(string.format("UnicodeCharacter unicodeCharacters[%d] = {\n", #characters))
|
||||||
local v = unicodeSet[i].Characters[j]
|
|
||||||
if (v) then
|
for _, data in pairs(characters) do
|
||||||
file:write(string.format("\t{%s,%s,%d,%d,%d},\n", v.Category, v.Direction, v.LowerCase, v.TitleCase, v.UpperCase))
|
local category = CategoryToString[data.category]
|
||||||
else
|
if (not category) then
|
||||||
file:write(string.format("\t{Category_NoCategory,Direction_Boundary_Neutral,%d,%d,%d},\n", j, j, j))
|
error("Unknown category " .. data.category .. " for character " .. data.codepoint)
|
||||||
end
|
|
||||||
end
|
end
|
||||||
|
|
||||||
file:write("};\n\n")
|
local direction = DirectionToString[data.direction]
|
||||||
|
if (not direction) then
|
||||||
|
error("Unknown direction " .. data.direction .. " for character " .. data.codepoint)
|
||||||
|
end
|
||||||
|
|
||||||
|
file:write(string.format("\t{%d, Unicode::%s, Unicode::%s},\n", data.codepoint, category, direction))
|
||||||
end
|
end
|
||||||
|
file:write("};\n\n")
|
||||||
|
|
||||||
|
file:write(string.format("UnicodeSet unicodeSets[%d] = {\n", #characterSets))
|
||||||
|
|
||||||
|
for _, data in pairs(characterSets) do
|
||||||
|
local category = CategoryToString[data.category]
|
||||||
|
if (not category) then
|
||||||
|
error("Unknown category " .. data.category .. " for character " .. data.codepoint)
|
||||||
|
end
|
||||||
|
|
||||||
|
local direction = DirectionToString[data.direction]
|
||||||
|
if (not direction) then
|
||||||
|
error("Unknown direction " .. data.direction .. " for character " .. data.codepoint)
|
||||||
|
end
|
||||||
|
|
||||||
|
file:write(string.format("\t{%d, %d, {%d, Unicode::%s, Unicode::%s}},\n", data.startCodepoint, data.endCodepoint, data.startCodepoint, category, direction))
|
||||||
|
end
|
||||||
|
file:write("};\n\n")
|
||||||
|
|
||||||
|
file:write(string.format("UnicodeCharacterSimpleMapping unicodeLower[%d] = {\n", #lowercaseCharacters))
|
||||||
|
for _, data in pairs(lowercaseCharacters) do
|
||||||
|
file:write(string.format("\t{%d, %d},\n", data.codepoint, data.lower))
|
||||||
|
end
|
||||||
|
file:write("};\n\n")
|
||||||
|
|
||||||
|
file:write(string.format("UnicodeCharacterSimpleMapping unicodeTitle[%d] = {\n", #titlecaseCharacters))
|
||||||
|
for _, data in pairs(titlecaseCharacters) do
|
||||||
|
file:write(string.format("\t{%d, %d},\n", data.codepoint, data.title))
|
||||||
|
end
|
||||||
|
file:write("};\n\n")
|
||||||
|
|
||||||
|
file:write(string.format("UnicodeCharacterSimpleMapping unicodeUpper[%d] = {\n", #uppercaseCharacters))
|
||||||
|
for _, data in pairs(uppercaseCharacters) do
|
||||||
|
file:write(string.format("\t{%d, %d},\n", data.codepoint, data.upper))
|
||||||
|
end
|
||||||
|
file:write("};\n\n")
|
||||||
|
|
||||||
file:close()
|
file:close()
|
||||||
|
|
||||||
print("Took " .. os.difftime(os.clock(), t1) .. "sec.")
|
print("Succeeded in " .. (os.clock() - t1) .. "sec.")
|
||||||
end
|
end
|
||||||
--print(string.match("<Plane 15 Private Use, First>", "<.+, (%w+)>"))
|
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -50,7 +50,7 @@
|
||||||
#define NAZARA_CORE_FILE_BUFFERSIZE 4096
|
#define NAZARA_CORE_FILE_BUFFERSIZE 4096
|
||||||
|
|
||||||
// Incorporate the Unicode Character Data table (Necessary to make it work with the flag String::HandleUTF8)
|
// Incorporate the Unicode Character Data table (Necessary to make it work with the flag String::HandleUTF8)
|
||||||
#define NAZARA_CORE_INCLUDE_UNICODEDATA 0
|
#define NAZARA_CORE_INCLUDE_UNICODEDATA 1
|
||||||
|
|
||||||
// Use the MemoryManager to manage dynamic allocations (can detect memory leak but allocations/frees are slower)
|
// Use the MemoryManager to manage dynamic allocations (can detect memory leak but allocations/frees are slower)
|
||||||
#define NAZARA_CORE_MANAGE_MEMORY 0
|
#define NAZARA_CORE_MANAGE_MEMORY 0
|
||||||
|
|
|
||||||
|
|
@ -55,7 +55,7 @@ namespace Nz
|
||||||
Category_Other_PrivateUse = Category_Other | 0x0800, // Co
|
Category_Other_PrivateUse = Category_Other | 0x0800, // Co
|
||||||
Category_Other_Surrogate = Category_Other | 0x1000, // Cs
|
Category_Other_Surrogate = Category_Other | 0x1000, // Cs
|
||||||
|
|
||||||
// Ponctuations
|
// Punctuations
|
||||||
Category_Punctuation = 0x10, // P
|
Category_Punctuation = 0x10, // P
|
||||||
Category_Punctuation_Close = Category_Punctuation | 0x0100, // Pe
|
Category_Punctuation_Close = Category_Punctuation | 0x0100, // Pe
|
||||||
Category_Punctuation_Connector = Category_Punctuation | 0x0200, // Pc
|
Category_Punctuation_Connector = Category_Punctuation | 0x0200, // Pc
|
||||||
|
|
@ -81,25 +81,29 @@ namespace Nz
|
||||||
|
|
||||||
enum Direction : UInt8
|
enum Direction : UInt8
|
||||||
{
|
{
|
||||||
Direction_Arabic_Letter, // AL
|
Direction_Arabic_Letter, // AL
|
||||||
Direction_Arabic_Number, // AN
|
Direction_Arabic_Number, // AN
|
||||||
Direction_Boundary_Neutral, // BN
|
Direction_Boundary_Neutral, // BN
|
||||||
Direction_Common_Separator, // CS
|
Direction_Common_Separator, // CS
|
||||||
Direction_European_Number, // EN
|
Direction_European_Number, // EN
|
||||||
Direction_European_Separator, // ES
|
Direction_European_Separator, // ES
|
||||||
Direction_European_Terminator, // ET
|
Direction_European_Terminator, // ET
|
||||||
Direction_Left_To_Right, // L
|
Direction_First_Strong_Isolate, // FSI
|
||||||
Direction_Left_To_Right_Embedding, // LRE
|
Direction_Left_To_Right, // L
|
||||||
Direction_Left_To_Right_Override, // LRO
|
Direction_Left_To_Right_Embedding, // LRE
|
||||||
Direction_Nonspacing_Mark, // NSM
|
Direction_Left_To_Right_Isolate, // LRI
|
||||||
Direction_Other_Neutral, // ON
|
Direction_Left_To_Right_Override, // LRO
|
||||||
Direction_Paragraph_Separator, // B
|
Direction_Nonspacing_Mark, // NSM
|
||||||
Direction_Pop_Directional_Format, // PDF
|
Direction_Other_Neutral, // ON
|
||||||
Direction_Right_To_Left, // R
|
Direction_Paragraph_Separator, // B
|
||||||
Direction_Right_To_Left_Embedding, // RLE
|
Direction_Pop_Directional_Formatting, // PDF
|
||||||
Direction_Right_To_Left_Override, // RLO
|
Direction_Pop_Directional_Isolate, // PDI
|
||||||
Direction_Segment_Separator, // S
|
Direction_Right_To_Left, // R
|
||||||
Direction_White_Space // WS
|
Direction_Right_To_Left_Embedding, // RLE
|
||||||
|
Direction_Right_To_Left_Isolate, // RLI
|
||||||
|
Direction_Right_To_Left_Override, // RLO
|
||||||
|
Direction_Segment_Separator, // S
|
||||||
|
Direction_White_Space // WS
|
||||||
};
|
};
|
||||||
|
|
||||||
static Category GetCategory(char32_t character);
|
static Category GetCategory(char32_t character);
|
||||||
|
|
|
||||||
|
|
@ -4,23 +4,140 @@
|
||||||
|
|
||||||
#include <Nazara/Core/Unicode.hpp>
|
#include <Nazara/Core/Unicode.hpp>
|
||||||
#include <Nazara/Core/Config.hpp>
|
#include <Nazara/Core/Config.hpp>
|
||||||
|
#include <algorithm>
|
||||||
#include <Nazara/Core/Debug.hpp>
|
#include <Nazara/Core/Debug.hpp>
|
||||||
|
|
||||||
#if NAZARA_CORE_INCLUDE_UNICODEDATA
|
#if NAZARA_CORE_INCLUDE_UNICODEDATA
|
||||||
namespace Nz
|
namespace Nz
|
||||||
{
|
{
|
||||||
struct Character
|
struct UnicodeCharacter
|
||||||
{
|
{
|
||||||
UInt16 category; // The type of the character
|
UInt32 codepoint;
|
||||||
UInt8 direction; // The reading way of the character
|
Unicode::Category category; // The type of the character
|
||||||
UInt32 lowerCase; // The corresponding lower character
|
Unicode::Direction direction; // The reading way of the character
|
||||||
UInt32 titleCase; // The corresponding title character
|
};
|
||||||
UInt32 upperCase; // The corresponding upper character
|
|
||||||
|
struct UnicodeSet
|
||||||
|
{
|
||||||
|
UInt32 firstCodepoint;
|
||||||
|
UInt32 lastCodepoint;
|
||||||
|
UnicodeCharacter character;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct UnicodeCharacterSimpleMapping
|
||||||
|
{
|
||||||
|
UInt32 codepoint;
|
||||||
|
UInt32 character;
|
||||||
};
|
};
|
||||||
}
|
|
||||||
|
|
||||||
#include <Nazara/Core/UnicodeData.hpp>
|
#include <Nazara/Core/UnicodeData.hpp>
|
||||||
|
|
||||||
|
namespace
|
||||||
|
{
|
||||||
|
const UnicodeCharacter* GetCharacter(Nz::UInt32 codepoint)
|
||||||
|
{
|
||||||
|
auto it = std::lower_bound(std::begin(unicodeCharacters), std::end(unicodeCharacters), codepoint, [](const UnicodeCharacter& character, Nz::UInt32 codepoint) { return character.codepoint < codepoint; });
|
||||||
|
if (it != std::end(unicodeCharacters) && it->codepoint == codepoint)
|
||||||
|
return &*it;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Character is not part of the common character array, search in set
|
||||||
|
auto itSet = std::lower_bound(std::begin(unicodeSets), std::end(unicodeSets), codepoint, [](const UnicodeSet& character, Nz::UInt32 codepoint) { return character.firstCodepoint < codepoint; });
|
||||||
|
if (itSet != std::begin(unicodeSets))
|
||||||
|
{
|
||||||
|
--itSet;
|
||||||
|
if (itSet != std::end(unicodeSets) && codepoint >= itSet->firstCodepoint && codepoint <= itSet->lastCodepoint)
|
||||||
|
return &itSet->character;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
template<std::size_t N>
|
||||||
|
const UnicodeCharacterSimpleMapping* GetCharacterMapping(Nz::UInt32 codepoint, const UnicodeCharacterSimpleMapping(&mapping)[N])
|
||||||
|
{
|
||||||
|
auto it = std::lower_bound(std::begin(mapping), std::end(mapping), codepoint, [](const UnicodeCharacterSimpleMapping& character, Nz::UInt32 codepoint) { return character.codepoint < codepoint; });
|
||||||
|
if (it != std::end(mapping) && it->codepoint == codepoint)
|
||||||
|
return &*it;
|
||||||
|
else
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets the category of the character
|
||||||
|
* \return Unicode category
|
||||||
|
*
|
||||||
|
* \param character Character to get assignated category
|
||||||
|
*/
|
||||||
|
Unicode::Category Unicode::GetCategory(char32_t character)
|
||||||
|
{
|
||||||
|
if (const UnicodeCharacter* characterData = GetCharacter(character))
|
||||||
|
return characterData->category;
|
||||||
|
else
|
||||||
|
return Category_NoCategory;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets the direction of reading of the character
|
||||||
|
* \return Unicode direction
|
||||||
|
*
|
||||||
|
* \param character Character to get assignated direction
|
||||||
|
*/
|
||||||
|
|
||||||
|
Unicode::Direction Unicode::GetDirection(char32_t character)
|
||||||
|
{
|
||||||
|
if (const UnicodeCharacter* characterData = GetCharacter(character))
|
||||||
|
return characterData->direction;
|
||||||
|
else
|
||||||
|
return Direction_Boundary_Neutral;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets the lower case of the character
|
||||||
|
* \return Unicode lower
|
||||||
|
*
|
||||||
|
* \param character Character to get assignated lower case
|
||||||
|
*/
|
||||||
|
|
||||||
|
char32_t Unicode::GetLowercase(char32_t character)
|
||||||
|
{
|
||||||
|
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeLower))
|
||||||
|
return characterMapping->character;
|
||||||
|
else
|
||||||
|
return character;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets the title case of the character
|
||||||
|
* \return Unicode title
|
||||||
|
*
|
||||||
|
* \param character Character to get assignated title case
|
||||||
|
*/
|
||||||
|
char32_t Unicode::GetTitlecase(char32_t character)
|
||||||
|
{
|
||||||
|
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeTitle))
|
||||||
|
return characterMapping->character;
|
||||||
|
else
|
||||||
|
return character;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* \brief Gets the upper case of the character
|
||||||
|
* \return Unicode upper
|
||||||
|
*
|
||||||
|
* \param character Character to get assignated upper case
|
||||||
|
*/
|
||||||
|
char32_t Unicode::GetUppercase(char32_t character)
|
||||||
|
{
|
||||||
|
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeUpper))
|
||||||
|
return characterMapping->character;
|
||||||
|
else
|
||||||
|
return character;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#else // Implementation handling ASCII table
|
#else // Implementation handling ASCII table
|
||||||
|
|
||||||
namespace Nz
|
namespace Nz
|
||||||
|
|
|
||||||
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue