Core/String: Consider tabspace as separator

Former-commit-id: 83188efd0af64eb3cbc683fd1ddb73ea3fa1adcc [formerly d0d8a4870433c3791ac38c9f2aa4e513ecf473f9] [formerly 6f9a95341216bb79e5a964900f76621b25b03b6c [formerly 6c51f4af08d55ca09da4a801f01cbe4c82e8b097]]
Former-commit-id: 6ee9a65c18432d62c60222e19a364f015a98e046 [formerly 7a2dce7db255019b5bcb547938a30765520b8062]
Former-commit-id: 2fac38a14d3e6ab16816e837d45847e084a6bb40
This commit is contained in:
Lynix 2016-08-05 09:08:45 +02:00
parent c8f759c14d
commit 812a41a4b2
1 changed files with 48 additions and 43 deletions

View File

@ -23,6 +23,11 @@ namespace Nz
{ {
namespace Detail namespace Detail
{ {
inline bool IsSpace(char32_t character)
{
return character == '\t' || Unicode::GetCategory(character) & Unicode::Category_Separator;
}
// This algorithm is inspired by the documentation of Qt // This algorithm is inspired by the documentation of Qt
inline std::size_t GetNewSize(std::size_t newSize) inline std::size_t GetNewSize(std::size_t newSize)
{ {
@ -1379,7 +1384,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1391,7 +1396,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*tIt) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*tIt))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1421,7 +1426,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1433,7 +1438,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*tIt) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*tIt))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1465,7 +1470,7 @@ namespace Nz
if (ptr != m_sharedString->string.get()) if (ptr != m_sharedString->string.get())
{ {
--ptr; --ptr;
if (!(Unicode::GetCategory(*ptr++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*ptr++))
continue; continue;
} }
@ -1475,7 +1480,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tPtr == '\0' || Unicode::GetCategory(*tPtr) & Unicode::Category_Separator) if (*tPtr == '\0' || Detail::IsSpace(*tPtr))
return ptr-m_sharedString->string.get(); return ptr-m_sharedString->string.get();
else else
break; break;
@ -1503,7 +1508,7 @@ namespace Nz
if (ptr != m_sharedString->string.get()) if (ptr != m_sharedString->string.get())
{ {
--ptr; --ptr;
if (!(Unicode::GetCategory(*ptr++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*ptr++))
continue; continue;
} }
@ -1513,7 +1518,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tPtr == '\0' || Unicode::GetCategory(*tPtr) & Unicode::Category_Separator) if (*tPtr == '\0' || Detail::IsSpace(*tPtr))
return ptr-m_sharedString->string.get(); return ptr-m_sharedString->string.get();
else else
break; break;
@ -1579,7 +1584,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1591,7 +1596,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*tIt) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*tIt))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1621,7 +1626,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1633,7 +1638,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*tIt) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*tIt))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1664,7 +1669,7 @@ namespace Nz
if (Detail::ToLower(*ptr) == c) if (Detail::ToLower(*ptr) == c)
{ {
char nextC = *(ptr + 1); char nextC = *(ptr + 1);
if (nextC != '\0' && (Unicode::GetCategory(nextC) & Unicode::Category_Separator_Space) == 0) if (nextC != '\0' && (Detail::IsSpace(nextC)) == 0)
continue; continue;
const char* p = &string.m_sharedString->string[string.m_sharedString->size-1]; const char* p = &string.m_sharedString->string[string.m_sharedString->size-1];
@ -1675,7 +1680,7 @@ namespace Nz
if (p == &string.m_sharedString->string[0]) if (p == &string.m_sharedString->string[0])
{ {
if (ptr == m_sharedString->string.get() || Unicode::GetCategory(*(ptr-1)) & Unicode::Category_Separator_Space) if (ptr == m_sharedString->string.get() || Detail::IsSpace(*(ptr-1)))
return ptr-m_sharedString->string.get(); return ptr-m_sharedString->string.get();
else else
break; break;
@ -1695,7 +1700,7 @@ namespace Nz
if (*ptr == string.m_sharedString->string[string.m_sharedString->size-1]) if (*ptr == string.m_sharedString->string[string.m_sharedString->size-1])
{ {
char nextC = *(ptr + 1); char nextC = *(ptr + 1);
if (nextC != '\0' && (Unicode::GetCategory(nextC) & Unicode::Category_Separator_Space) == 0) if (nextC != '\0' && !Detail::IsSpace(nextC))
continue; continue;
const char* p = &string.m_sharedString->string[string.m_sharedString->size-1]; const char* p = &string.m_sharedString->string[string.m_sharedString->size-1];
@ -1706,7 +1711,7 @@ namespace Nz
if (p == &string.m_sharedString->string[0]) if (p == &string.m_sharedString->string[0])
{ {
if (ptr == m_sharedString->string.get() || Unicode::GetCategory(*(ptr - 1)) & Unicode::Category_Separator_Space) if (ptr == m_sharedString->string.get() || Detail::IsSpace(*(ptr - 1)))
return ptr-m_sharedString->string.get(); return ptr-m_sharedString->string.get();
else else
break; break;
@ -1766,7 +1771,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1778,7 +1783,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*it++) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*it++))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1806,7 +1811,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1818,7 +1823,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*it++) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*it++))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1844,7 +1849,7 @@ namespace Nz
{ {
if (Detail::ToLower(*ptr) == c) if (Detail::ToLower(*ptr) == c)
{ {
if (ptr != m_sharedString->string.get() && (Unicode::GetCategory(*(ptr - 1)) & Unicode::Category_Separator) == 0) if (ptr != m_sharedString->string.get() && !Detail::IsSpace(*(ptr - 1)))
continue; continue;
const char* p = &string[1]; const char* p = &string[1];
@ -1853,7 +1858,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tPtr == '\0' || Unicode::GetCategory(*tPtr) & Unicode::Category_Separator) if (*tPtr == '\0' || Detail::IsSpace(*tPtr))
return ptr - m_sharedString->string.get(); return ptr - m_sharedString->string.get();
else else
break; break;
@ -1875,7 +1880,7 @@ namespace Nz
{ {
if (*ptr == string[0]) if (*ptr == string[0])
{ {
if (ptr != m_sharedString->string.get() && (Unicode::GetCategory(*(ptr-1)) & Unicode::Category_Separator) == 0) if (ptr != m_sharedString->string.get() && !Detail::IsSpace(*(ptr-1)))
continue; continue;
const char* p = &string[1]; const char* p = &string[1];
@ -1884,7 +1889,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tPtr == '\0' || Unicode::GetCategory(*tPtr) & Unicode::Category_Separator) if (*tPtr == '\0' || Detail::IsSpace(*tPtr))
return ptr - m_sharedString->string.get(); return ptr - m_sharedString->string.get();
else else
break; break;
@ -1947,7 +1952,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1959,7 +1964,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*it++) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*it++))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -1987,7 +1992,7 @@ namespace Nz
if (it.base() != m_sharedString->string.get()) if (it.base() != m_sharedString->string.get())
{ {
--it; --it;
if (!(Unicode::GetCategory(*it++) & Unicode::Category_Separator)) if (!Detail::IsSpace(*it++))
continue; continue;
} }
@ -1999,7 +2004,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tIt == '\0' || Unicode::GetCategory(*it++) & Unicode::Category_Separator) if (*tIt == '\0' || Detail::IsSpace(*it++))
return it.base() - m_sharedString->string.get(); return it.base() - m_sharedString->string.get();
else else
break; break;
@ -2026,7 +2031,7 @@ namespace Nz
{ {
if (Detail::ToLower(*ptr) == c) if (Detail::ToLower(*ptr) == c)
{ {
if (ptr != m_sharedString->string.get() && (Unicode::GetCategory(*(ptr-1)) & Unicode::Category_Separator_Space) == 0) if (ptr != m_sharedString->string.get() && !Detail::IsSpace(*(ptr-1)))
continue; continue;
const char* p = &string.m_sharedString->string[1]; const char* p = &string.m_sharedString->string[1];
@ -2035,7 +2040,7 @@ namespace Nz
{ {
if (*p == '\0') if (*p == '\0')
{ {
if (*tPtr == '\0' || Unicode::GetCategory(*tPtr) & Unicode::Category_Separator_Space) if (*tPtr == '\0' || Detail::IsSpace(*tPtr))
return ptr - m_sharedString->string.get(); return ptr - m_sharedString->string.get();
else else
break; break;
@ -2056,7 +2061,7 @@ namespace Nz
while ((ptr = std::strstr(ptr, string.GetConstBuffer())) != nullptr) while ((ptr = std::strstr(ptr, string.GetConstBuffer())) != nullptr)
{ {
// If the word is really alone // If the word is really alone
if ((ptr == m_sharedString->string.get() || Unicode::GetCategory(*(ptr-1)) & Unicode::Category_Separator_Space) && (*(ptr+m_sharedString->size) == '\0' || Unicode::GetCategory(*(ptr+m_sharedString->size)) & Unicode::Category_Separator_Space)) if ((ptr == m_sharedString->string.get() || Detail::IsSpace(*(ptr-1))) && (*(ptr+m_sharedString->size) == '\0' || Detail::IsSpace(*(ptr+m_sharedString->size))))
return ptr - m_sharedString->string.get(); return ptr - m_sharedString->string.get();
ptr++; ptr++;
@ -2219,7 +2224,7 @@ namespace Nz
utf8::unchecked::iterator<const char*> it(ptr); utf8::unchecked::iterator<const char*> it(ptr);
do do
{ {
if (Unicode::GetCategory(*it) & Unicode::Category_Separator) if (Detail::IsSpace(*it))
{ {
endPos = static_cast<std::intmax_t>(it.base() - m_sharedString->string.get() - 1); endPos = static_cast<std::intmax_t>(it.base() - m_sharedString->string.get() - 1);
break; break;
@ -2231,7 +2236,7 @@ namespace Nz
{ {
do do
{ {
if (Unicode::GetCategory(*ptr) & Unicode::Category_Separator) if (Detail::IsSpace(*ptr))
{ {
endPos = static_cast<std::intmax_t>(ptr - m_sharedString->string.get() - 1); endPos = static_cast<std::intmax_t>(ptr - m_sharedString->string.get() - 1);
break; break;
@ -2265,7 +2270,7 @@ namespace Nz
utf8::unchecked::iterator<const char*> it(ptr); utf8::unchecked::iterator<const char*> it(ptr);
do do
{ {
if (Unicode::GetCategory(*it) & Unicode::Category_Separator) if (Detail::IsSpace(*it))
inWord = false; inWord = false;
else else
{ {
@ -2283,7 +2288,7 @@ namespace Nz
{ {
do do
{ {
if (Unicode::GetCategory(*ptr) & Unicode::Category_Separator) if (Detail::IsSpace(*ptr))
inWord = false; inWord = false;
else else
{ {
@ -3414,7 +3419,7 @@ namespace Nz
utf8::unchecked::iterator<const char*> it(ptr); utf8::unchecked::iterator<const char*> it(ptr);
do do
{ {
if (Unicode::GetCategory(*it) & Unicode::Category_Separator) if (Detail::IsSpace(*it))
{ {
if (inword) if (inword)
{ {
@ -3435,7 +3440,7 @@ namespace Nz
const char* limit = &m_sharedString->string[m_sharedString->size]; const char* limit = &m_sharedString->string[m_sharedString->size];
do do
{ {
if (Unicode::GetCategory(*ptr) & Unicode::Category_Separator) if (Detail::IsSpace(*ptr))
{ {
if (inword) if (inword)
{ {
@ -4240,7 +4245,7 @@ namespace Nz
utf8::unchecked::iterator<const char*> it(m_sharedString->string.get()); utf8::unchecked::iterator<const char*> it(m_sharedString->string.get());
do do
{ {
if (*it != '\t' && (Unicode::GetCategory(*it) & Unicode::Category_Separator) == 0) if (!Detail::IsSpace(*it))
break; break;
} }
while (*++it); while (*++it);
@ -4255,7 +4260,7 @@ namespace Nz
utf8::unchecked::iterator<const char*> it(&m_sharedString->string[m_sharedString->size]); utf8::unchecked::iterator<const char*> it(&m_sharedString->string[m_sharedString->size]);
while ((it--).base() != m_sharedString->string.get()) while ((it--).base() != m_sharedString->string.get())
{ {
if (*it != '\t' && (Unicode::GetCategory(*it) & Unicode::Category_Separator) == 0) if (!Detail::IsSpace(*it))
break; break;
} }
@ -4271,8 +4276,8 @@ namespace Nz
{ {
for (; startPos < m_sharedString->size; ++startPos) for (; startPos < m_sharedString->size; ++startPos)
{ {
char c = m_sharedString->string[startPos]; char c = m_sharedString->string[startPos];
if (c != '\t' && (Unicode::GetCategory(c) & Unicode::Category_Separator) == 0) if (!Detail::IsSpace(c))
break; break;
} }
} }
@ -4282,8 +4287,8 @@ namespace Nz
{ {
for (; endPos > 0; --endPos) for (; endPos > 0; --endPos)
{ {
char c = m_sharedString->string[endPos]; char c = m_sharedString->string[endPos];
if (c != '\t' && (Unicode::GetCategory(c) & Unicode::Category_Separator) == 0) if (!Detail::IsSpace(c))
break; break;
} }
} }