NazaraEngine/src/Nazara/Core/StringExt.cpp

649 lines
15 KiB
C++

// Copyright (C) 2023 Jérôme "Lynix" Leclercq (lynix680@gmail.com)
// This file is part of the "Nazara Engine - Core module"
// For conditions of distribution and use, see copyright notice in Config.hpp
#include <Nazara/Core/StringExt.hpp>
#include <Nazara/Core/Algorithm.hpp>
#include <Nazara/Core/Error.hpp>
#include <Utfcpp/utf8.h>
#include <cinttypes>
#include <Nazara/Core/Debug.hpp>
namespace Nz
{
namespace NAZARA_ANONYMOUS_NAMESPACE
{
bool IsSpace(char32_t character)
{
switch (character)
{
case '\f':
case '\n':
case '\r':
case '\t':
case '\v':
return true;
default:
return Unicode::GetCategory(character) & Unicode::Category_Separator;
}
}
char ToLower(char character)
{
if (character >= 'A' && character <= 'Z')
return character + ('a' - 'A');
else
return character;
}
char ToUpper(char character)
{
if (character >= 'a' && character <= 'z')
return character + ('A' - 'a');
else
return character;
}
template<std::size_t S>
struct WideConverter
{
static_assert(AlwaysFalse<std::integral_constant<std::size_t, S>>(), "unsupported platform");
};
#ifdef NAZARA_PLATFORM_WINDOWS
template<>
struct WideConverter<2>
{
// UTF-16 (Windows)
static std::string From(const wchar_t* wstr, std::size_t size)
{
return FromUtf16String(std::u16string_view(reinterpret_cast<const char16_t*>(wstr), size));
}
static std::wstring To(const std::string_view& str)
{
std::wstring result;
utf8::utf8to16(str.begin(), str.end(), std::back_inserter(result));
return result;
}
};
#endif
#ifndef NAZARA_PLATFORM_WINDOWS
template<>
struct WideConverter<4>
{
// UTF-32 (POSIX)
static std::string From(const wchar_t* wstr, std::size_t size)
{
return FromUtf32String(std::u32string_view(reinterpret_cast<const char32_t*>(wstr), size));
}
static std::wstring To(const std::string_view& str)
{
std::wstring result;
utf8::utf8to32(str.begin(), str.end(), std::back_inserter(result));
return result;
}
};
#endif
}
std::size_t ComputeCharacterCount(const std::string_view& str)
{
return utf8::distance(str.data(), str.data() + str.size());
}
bool EndsWith(const std::string_view& lhs, const std::string_view& rhs, CaseIndependent)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
if (rhs.size() > lhs.size())
return false;
return std::equal(lhs.end() - rhs.size(), lhs.end(), rhs.begin(), rhs.end(), [](char c1, char c2)
{
return ToLower(c1) == ToLower(c2);
});
}
bool EndsWith(const std::string_view& lhs, const std::string_view& rhs, UnicodeAware)
{
if (lhs.empty())
return lhs == rhs;
else if (rhs.empty())
return true;
else if (rhs.size() > lhs.size())
return false;
utf8::iterator<const char*> it(lhs.data() + lhs.size() - rhs.size(), lhs.data() + lhs.size() - rhs.size(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
do
{
if (it2.base() >= rhs.data() + rhs.size())
return true;
if (*it != *it2)
return false;
++it2;
}
while (*it++);
return true;
}
bool EndsWith(const std::string_view& lhs, const std::string_view& rhs, CaseIndependent, UnicodeAware)
{
if (lhs.empty())
return lhs == rhs;
else if (rhs.empty())
return true;
else if (rhs.size() > lhs.size())
return false;
utf8::iterator<const char*> it(lhs.data() + lhs.size() - rhs.size(), lhs.data() + lhs.size() - rhs.size(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
do
{
if (it2.base() >= rhs.data() + rhs.size())
return true;
if (Unicode::GetLowercase(*it) != Unicode::GetLowercase(*it2))
return false;
++it2;
}
while (*it++);
return true;
}
std::string FromUtf16String(const std::u16string_view& u16str)
{
std::string result;
utf8::utf16to8(u16str.begin(), u16str.end(), std::back_inserter(result));
return result;
}
std::string FromUtf32String(const std::u32string_view& u32str)
{
std::string result;
utf8::utf32to8(u32str.begin(), u32str.end(), std::back_inserter(result));
return result;
}
std::string FromWideString(const std::wstring_view& wstr)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
return WideConverter<sizeof(wchar_t)>::From(wstr.data(), wstr.size());
}
std::size_t GetCharacterPosition(const std::string_view& str, std::size_t characterIndex)
{
const char* ptr = str.data();
const char* end = ptr + str.size();
try
{
utf8::advance(ptr, characterIndex, end);
return ptr - str.data();
}
catch (utf8::not_enough_room& /*e*/)
{
// Returns npos
}
catch (utf8::exception& e)
{
NazaraError("UTF-8 error: " + std::string(e.what()));
}
catch (std::exception& e)
{
NazaraError(e.what());
}
return std::string::npos;
}
std::string_view GetWord(const std::string_view& str, std::size_t wordIndex)
{
std::size_t pos = 0;
std::size_t previousPos = 0;
while ((pos = str.find_first_of(" \f\n\r\t\v", previousPos)) != std::string::npos)
{
std::size_t splitPos = previousPos;
previousPos = pos + 1;
if (pos != splitPos && wordIndex-- == 0)
return str.substr(splitPos, pos - splitPos);
}
return {};
}
std::string_view GetWord(const std::string_view& str, std::size_t wordIndex, UnicodeAware)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
auto FindNextSeparator = [&]() -> std::size_t
{
for (; it != end; ++it)
{
if (IsSpace(*it))
return true;
}
return false;
};
utf8::unchecked::iterator<const char*> lastSplit = it;
while (FindNextSeparator())
{
if (it != lastSplit && wordIndex-- == 0)
return std::string_view(lastSplit.base(), it.base() - lastSplit.base());
++it;
lastSplit = it;
}
return {};
}
bool MatchPattern(const std::string_view& str, const std::string_view& pattern)
{
if (str.empty() || pattern.empty())
return false;
// Par Jack Handy - akkhandy@hotmail.com
// From : http://www.codeproject.com/Articles/1088/Wildcard-string-compare-globbing
const char* ptr = str.data();
const char* ptrEnd = str.data() + str.size();
const char* patternPtr = pattern.data();
const char* patternPtrEnd = pattern.data() + pattern.size();
while (ptr < ptrEnd && *patternPtr != '*')
{
if (patternPtr < patternPtrEnd && *patternPtr != *ptr && *patternPtr != '?')
return false;
patternPtr++;
ptr++;
}
const char* cp = nullptr;
const char* mp = nullptr;
while (*ptr)
{
if (*patternPtr == '*')
{
if (patternPtr + 1 >= patternPtrEnd)
return true;
mp = ++patternPtr;
cp = ptr + 1;
}
else if (*patternPtr == *ptr || *patternPtr == '?')
{
patternPtr++;
ptr++;
}
else
{
patternPtr = mp;
ptr = cp++;
}
}
while (patternPtr < patternPtrEnd && *patternPtr == '*')
patternPtr++;
return patternPtr >= patternPtrEnd;
}
std::string PointerToString(const void* ptr)
{
constexpr int width = static_cast<int>(sizeof(uintptr_t) * 2);
std::string str(width + 2, '\0');
str.resize(std::sprintf(str.data(), "0x%0*" PRIXPTR, width, reinterpret_cast<uintptr_t>(ptr)));
return str;
}
bool StartsWith(const std::string_view& lhs, const std::string_view& rhs, CaseIndependent)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
if (rhs.size() > lhs.size())
return false;
return std::equal(lhs.begin(), lhs.begin() + rhs.size(), rhs.begin(), rhs.end(), [](char c1, char c2)
{
return ToLower(c1) == ToLower(c2);
});
}
bool StartsWith(const std::string_view& lhs, const std::string_view& rhs, UnicodeAware)
{
if (lhs.empty())
return lhs == rhs;
else if (rhs.empty())
return true;
utf8::iterator<const char*> it(lhs.data(), lhs.data(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
do
{
if (it2.base() >= rhs.data() + rhs.size())
return true;
if (*it != *it2)
return false;
++it2;
}
while (*it++);
return true;
}
bool StartsWith(const std::string_view& lhs, const std::string_view& rhs, CaseIndependent, UnicodeAware)
{
if (lhs.empty())
return lhs == rhs;
else if (rhs.empty())
return true;
utf8::iterator<const char*> it(lhs.data(), lhs.data(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
do
{
if (it2.base() >= rhs.data() + rhs.size())
return true;
if (Unicode::GetLowercase(*it) != Unicode::GetLowercase(*it2))
return false;
++it2;
}
while (*it++);
return true;
}
bool StringEqual(const std::string_view& lhs, const std::string_view& rhs, UnicodeAware)
{
if (lhs.empty() || rhs.empty())
return lhs == rhs;
utf8::iterator<const char*> it(lhs.data(), lhs.data(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
for (; it.base() < lhs.data() + lhs.size(); ++it, ++it2)
{
if (*it != *it2)
return false;
}
return true;
}
bool StringEqual(const std::string_view& lhs, const std::string_view& rhs, CaseIndependent, UnicodeAware)
{
if (lhs.empty() || rhs.empty())
return lhs == rhs;
utf8::iterator<const char*> it(lhs.data(), lhs.data(), lhs.data() + lhs.size());
utf8::iterator<const char*> it2(rhs.data(), rhs.data(), rhs.data() + rhs.size());
for (; it.base() < lhs.data() + lhs.size(); ++it, ++it2)
{
if (Unicode::GetLowercase(*it) != Unicode::GetLowercase(*it2))
return false;
}
return true;
}
std::string ToLower(const std::string_view& str)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
std::string result;
result.reserve(str.size());
std::transform(str.begin(), str.end(), std::back_inserter(result), Overload<char>(ToLower));
return result;
}
std::string ToLower(const std::string_view& str, UnicodeAware)
{
if (str.empty())
return std::string();
std::string result;
result.reserve(str.size());
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
for (; it != end; ++it)
utf8::append(Unicode::GetLowercase(*it), std::back_inserter(result));
return result;
}
std::string ToUpper(const std::string_view& str)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
std::string result;
result.reserve(str.size());
std::transform(str.begin(), str.end(), std::back_inserter(result), Overload<char>(ToUpper));
return result;
}
std::string ToUpper(const std::string_view& str, UnicodeAware)
{
if (str.empty())
return std::string();
std::string result;
result.reserve(str.size());
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
for (; it != end; ++it)
utf8::append(Unicode::GetUppercase(*it), std::back_inserter(result));
return result;
}
std::u16string ToUtf16String(const std::string_view& str)
{
std::u16string result;
utf8::utf8to16(str.begin(), str.end(), std::back_inserter(result));
return result;
}
std::u32string ToUtf32String(const std::string_view& str)
{
std::u32string result;
utf8::utf8to32(str.begin(), str.end(), std::back_inserter(result));
return result;
}
std::wstring ToWideString(const std::string_view& str)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
return WideConverter<sizeof(wchar_t)>::To(str);
}
std::string_view TrimLeft(std::string_view str)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
while (!str.empty() && IsSpace(str.front()))
str.remove_prefix(1);
return str;
}
std::string_view TrimLeft(std::string_view str, UnicodeAware)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
while (it != end && IsSpace(*it))
++it;
return std::string_view(it.base(), end.base() - it.base());
}
std::string_view TrimLeft(std::string_view str, char32_t c, UnicodeAware)
{
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
while (it != end && *it == c)
++it;
return std::string_view(it.base(), end.base() - it.base());
}
std::string_view TrimLeft(std::string_view str, char32_t c, CaseIndependent, UnicodeAware)
{
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
c = Unicode::GetLowercase(c);
while (it != end && Unicode::GetLowercase(*it) == c)
++it;
return std::string_view(it.base(), end.base() - it.base());
}
std::string_view TrimLeft(std::string_view str, Unicode::Category category, UnicodeAware)
{
utf8::unchecked::iterator<const char*> it(str.data());
utf8::unchecked::iterator<const char*> end(str.data() + str.size());
while (it != end && (Unicode::GetCategory(*it) & category) == category)
++it;
return std::string_view(it.base(), end.base() - it.base());
}
std::string_view TrimRight(std::string_view str)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
while (!str.empty() && IsSpace(str.back()))
str.remove_suffix(1);
return str;
}
std::string_view TrimRight(std::string_view str, UnicodeAware)
{
NAZARA_USE_ANONYMOUS_NAMESPACE
if (str.empty())
return str;
// Find last character head
const char* lastCharacter = str.data() + str.size() - 1;
while (utf8::internal::is_trail(*lastCharacter) && lastCharacter != str.data())
--lastCharacter;
utf8::unchecked::iterator<const char*> start(str.data());
utf8::unchecked::iterator<const char*> it(lastCharacter);
while (it != start && IsSpace(*it))
--it;
++it;
return std::string_view(start.base(), it.base() - start.base());
}
std::string_view TrimRight(std::string_view str, char32_t c, UnicodeAware)
{
if (str.empty())
return str;
// Find last character head
const char* lastCharacter = str.data() + str.size() - 1;
while (utf8::internal::is_trail(*lastCharacter) && lastCharacter != str.data())
--lastCharacter;
utf8::unchecked::iterator<const char*> start(str.data());
utf8::unchecked::iterator<const char*> it(lastCharacter);
while (it != start && *it == c)
--it;
++it;
return std::string_view(start.base(), it.base() - start.base());
}
std::string_view TrimRight(std::string_view str, char32_t c, CaseIndependent, UnicodeAware)
{
if (str.empty())
return str;
// Find last character head
const char* lastCharacter = str.data() + str.size() - 1;
while (utf8::internal::is_trail(*lastCharacter) && lastCharacter != str.data())
--lastCharacter;
utf8::unchecked::iterator<const char*> start(str.data());
utf8::unchecked::iterator<const char*> it(lastCharacter);
c = Unicode::GetLowercase(c);
while (it != start && Unicode::GetLowercase(*it) == c)
--it;
++it;
return std::string_view(start.base(), it.base() - start.base());
}
std::string_view TrimRight(std::string_view str, Unicode::Category category, UnicodeAware)
{
if (str.empty())
return str;
// Find last character head
const char* lastCharacter = str.data() + str.size() - 1;
while (utf8::internal::is_trail(*lastCharacter) && lastCharacter != str.data())
--lastCharacter;
utf8::unchecked::iterator<const char*> start(str.data());
utf8::unchecked::iterator<const char*> it(lastCharacter);
while (it != start && (Unicode::GetCategory(*it) & category) == category)
--it;
++it;
return std::string_view(start.base(), it.base() - start.base());
}
}