Core: Integrated Unicode data

This commit is contained in:
Lynix
2018-08-03 20:24:56 +02:00
parent cf24b8abe4
commit 39d1d31639
5 changed files with 36780 additions and 109 deletions

View File

@@ -4,23 +4,140 @@
#include <Nazara/Core/Unicode.hpp>
#include <Nazara/Core/Config.hpp>
#include <algorithm>
#include <Nazara/Core/Debug.hpp>
#if NAZARA_CORE_INCLUDE_UNICODEDATA
namespace Nz
{
struct Character
struct UnicodeCharacter
{
UInt16 category; // The type of the character
UInt8 direction; // The reading way of the character
UInt32 lowerCase; // The corresponding lower character
UInt32 titleCase; // The corresponding title character
UInt32 upperCase; // The corresponding upper character
UInt32 codepoint;
Unicode::Category category; // The type of the character
Unicode::Direction direction; // The reading way of the character
};
struct UnicodeSet
{
UInt32 firstCodepoint;
UInt32 lastCodepoint;
UnicodeCharacter character;
};
struct UnicodeCharacterSimpleMapping
{
UInt32 codepoint;
UInt32 character;
};
}
#include <Nazara/Core/UnicodeData.hpp>
namespace
{
const UnicodeCharacter* GetCharacter(Nz::UInt32 codepoint)
{
auto it = std::lower_bound(std::begin(unicodeCharacters), std::end(unicodeCharacters), codepoint, [](const UnicodeCharacter& character, Nz::UInt32 codepoint) { return character.codepoint < codepoint; });
if (it != std::end(unicodeCharacters) && it->codepoint == codepoint)
return &*it;
else
{
// Character is not part of the common character array, search in set
auto itSet = std::lower_bound(std::begin(unicodeSets), std::end(unicodeSets), codepoint, [](const UnicodeSet& character, Nz::UInt32 codepoint) { return character.firstCodepoint < codepoint; });
if (itSet != std::begin(unicodeSets))
{
--itSet;
if (itSet != std::end(unicodeSets) && codepoint >= itSet->firstCodepoint && codepoint <= itSet->lastCodepoint)
return &itSet->character;
}
}
return nullptr;
}
template<std::size_t N>
const UnicodeCharacterSimpleMapping* GetCharacterMapping(Nz::UInt32 codepoint, const UnicodeCharacterSimpleMapping(&mapping)[N])
{
auto it = std::lower_bound(std::begin(mapping), std::end(mapping), codepoint, [](const UnicodeCharacterSimpleMapping& character, Nz::UInt32 codepoint) { return character.codepoint < codepoint; });
if (it != std::end(mapping) && it->codepoint == codepoint)
return &*it;
else
return nullptr;
}
}
/*!
* \brief Gets the category of the character
* \return Unicode category
*
* \param character Character to get assignated category
*/
Unicode::Category Unicode::GetCategory(char32_t character)
{
if (const UnicodeCharacter* characterData = GetCharacter(character))
return characterData->category;
else
return Category_NoCategory;
}
/*!
* \brief Gets the direction of reading of the character
* \return Unicode direction
*
* \param character Character to get assignated direction
*/
Unicode::Direction Unicode::GetDirection(char32_t character)
{
if (const UnicodeCharacter* characterData = GetCharacter(character))
return characterData->direction;
else
return Direction_Boundary_Neutral;
}
/*!
* \brief Gets the lower case of the character
* \return Unicode lower
*
* \param character Character to get assignated lower case
*/
char32_t Unicode::GetLowercase(char32_t character)
{
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeLower))
return characterMapping->character;
else
return character;
}
/*!
* \brief Gets the title case of the character
* \return Unicode title
*
* \param character Character to get assignated title case
*/
char32_t Unicode::GetTitlecase(char32_t character)
{
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeTitle))
return characterMapping->character;
else
return character;
}
/*!
* \brief Gets the upper case of the character
* \return Unicode upper
*
* \param character Character to get assignated upper case
*/
char32_t Unicode::GetUppercase(char32_t character)
{
if (const UnicodeCharacterSimpleMapping* characterMapping = GetCharacterMapping(character, unicodeUpper))
return characterMapping->character;
else
return character;
}
}
#else // Implementation handling ASCII table
namespace Nz

File diff suppressed because it is too large Load Diff