OpenMW
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Macros Groups Pages
stringops.hpp
Go to the documentation of this file.
1 #ifndef MISC_STRINGOPS_H
2 #define MISC_STRINGOPS_H
3 
4 #include <cctype>
5 #include <cstring>
6 #include <string>
7 #include <algorithm>
8 
9 #include "utf8stream.hpp"
10 
11 namespace Misc
12 {
14 {
15  struct ci
16  {
17  bool operator()(char x, char y) const {
18  return toLower(x) < toLower(y);
19  }
20  };
21 
22 public:
23 
27  static char toLower(char c)
28  {
29  switch(c)
30  {
31  case 'A':return 'a';
32  case 'B':return 'b';
33  case 'C':return 'c';
34  case 'D':return 'd';
35  case 'E':return 'e';
36  case 'F':return 'f';
37  case 'G':return 'g';
38  case 'H':return 'h';
39  case 'I':return 'i';
40  case 'J':return 'j';
41  case 'K':return 'k';
42  case 'L':return 'l';
43  case 'M':return 'm';
44  case 'N':return 'n';
45  case 'O':return 'o';
46  case 'P':return 'p';
47  case 'Q':return 'q';
48  case 'R':return 'r';
49  case 'S':return 's';
50  case 'T':return 't';
51  case 'U':return 'u';
52  case 'V':return 'v';
53  case 'W':return 'w';
54  case 'X':return 'x';
55  case 'Y':return 'y';
56  case 'Z':return 'z';
57  default:return c;
58  };
59  }
60 
62  {
63  // Russian alphabet
64  if (ch >= 0x0410 && ch < 0x0430)
65  return ch += 0x20;
66 
67  // Cyrillic IO character
68  if (ch == 0x0401)
69  return ch += 0x50;
70 
71  // Latin alphabet
72  if (ch >= 0x41 && ch < 0x60)
73  return ch += 0x20;
74 
75  // Deutch characters
76  if (ch == 0xc4 || ch == 0xd6 || ch == 0xdc)
77  return ch += 0x20;
78  if (ch == 0x1e9e)
79  return 0xdf;
80 
81  // TODO: probably we will need to support characters from other languages
82 
83  return ch;
84  }
85 
86  static std::string lowerCaseUtf8(const std::string str)
87  {
88  if (str.empty())
89  return str;
90 
91  // Decode string as utf8 characters, convert to lower case and pack them to string
92  std::string out;
93  Utf8Stream stream (str.c_str());
94  while (!stream.eof ())
95  {
96  Utf8Stream::UnicodeChar character = toLowerUtf8(stream.peek());
97 
98  if (character <= 0x7f)
99  out.append(1, static_cast<char>(character));
100  else if (character <= 0x7ff)
101  {
102  out.append(1, static_cast<char>(0xc0 | ((character >> 6) & 0x1f)));
103  out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
104  }
105  else if (character <= 0xffff)
106  {
107  out.append(1, static_cast<char>(0xe0 | ((character >> 12) & 0x0f)));
108  out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f)));
109  out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
110  }
111  else
112  {
113  out.append(1, static_cast<char>(0xf0 | ((character >> 18) & 0x07)));
114  out.append(1, static_cast<char>(0x80 | ((character >> 12) & 0x3f)));
115  out.append(1, static_cast<char>(0x80 | ((character >> 6) & 0x3f)));
116  out.append(1, static_cast<char>(0x80 | (character & 0x3f)));
117  }
118 
119  stream.consume();
120  }
121 
122  return out;
123  }
124 
125  static bool ciLess(const std::string &x, const std::string &y) {
126  return std::lexicographical_compare(x.begin(), x.end(), y.begin(), y.end(), ci());
127  }
128 
129  static bool ciEqual(const std::string &x, const std::string &y) {
130  if (x.size() != y.size()) {
131  return false;
132  }
133  std::string::const_iterator xit = x.begin();
134  std::string::const_iterator yit = y.begin();
135  for (; xit != x.end(); ++xit, ++yit) {
136  if (toLower(*xit) != toLower(*yit)) {
137  return false;
138  }
139  }
140  return true;
141  }
142 
143  static int ciCompareLen(const std::string &x, const std::string &y, size_t len)
144  {
145  std::string::const_iterator xit = x.begin();
146  std::string::const_iterator yit = y.begin();
147  for(;xit != x.end() && yit != y.end() && len > 0;++xit,++yit,--len)
148  {
149  char left = *xit;
150  char right = *yit;
151  if (left == right)
152  continue;
153 
154  left = toLower(left);
155  right = toLower(right);
156  int res = left - right;
157  if(res != 0)
158  return (res > 0) ? 1 : -1;
159  }
160  if(len > 0)
161  {
162  if(xit != x.end())
163  return 1;
164  if(yit != y.end())
165  return -1;
166  }
167  return 0;
168  }
169 
171  static void lowerCaseInPlace(std::string &inout) {
172  for (unsigned int i=0; i<inout.size(); ++i)
173  inout[i] = toLower(inout[i]);
174  }
175 
177  static std::string lowerCase(const std::string &in)
178  {
179  std::string out = in;
180  lowerCaseInPlace(out);
181  return out;
182  }
183 
184  struct CiComp
185  {
186  bool operator()(const std::string& left, const std::string& right) const
187  {
188  return ciLess(left, right);
189  }
190  };
191 
192 
194  template<typename Iterator, typename T>
195  static Iterator partialBinarySearch(Iterator begin, Iterator end, const T& key)
196  {
197  const Iterator notFound = end;
198 
199  while(begin < end)
200  {
201  const Iterator middle = begin + (std::distance(begin, end) / 2);
202 
203  int comp = Misc::StringUtils::ciCompareLen((*middle), key, (*middle).size());
204 
205  if(comp == 0)
206  return middle;
207  else if(comp > 0)
208  end = middle;
209  else
210  begin = middle + 1;
211  }
212 
213  return notFound;
214  }
215 
226  static std::string &replaceAll(std::string &str, const char *what, const char *with,
227  std::size_t whatLen=std::string::npos, std::size_t withLen=std::string::npos)
228  {
229  if (whatLen == std::string::npos)
230  whatLen = strlen(what);
231 
232  if (withLen == std::string::npos)
233  withLen = strlen(with);
234 
235  std::size_t found;
236  std::size_t offset = 0;
237  while((found = str.find(what, offset, whatLen)) != std::string::npos)
238  {
239  str.replace(found, whatLen, with, withLen);
240  offset = found + withLen;
241  }
242  return str;
243  }
244 };
245 
246 }
247 
248 #endif
Definition: stringops.hpp:13
static bool ciEqual(const std::string &x, const std::string &y)
Definition: stringops.hpp:129
bool operator()(const std::string &left, const std::string &right) const
Definition: stringops.hpp:186
static Iterator partialBinarySearch(Iterator begin, Iterator end, const T &key)
Performs a binary search on a sorted container for a string that 'key' starts with.
Definition: stringops.hpp:195
static bool ciLess(const std::string &x, const std::string &y)
Definition: stringops.hpp:125
int comp(Arguments &info)
Definition: esmtool.cpp:525
static char toLower(char c)
Definition: stringops.hpp:27
static std::string lowerCase(const std::string &in)
Returns lower case copy of input string.
Definition: stringops.hpp:177
bool operator()(char x, char y) const
Definition: stringops.hpp:17
Definition: utf8stream.hpp:7
static int ciCompareLen(const std::string &x, const std::string &y, size_t len)
Definition: stringops.hpp:143
Definition: stringops.hpp:184
Definition: stringops.hpp:15
uint32_t UnicodeChar
Definition: utf8stream.hpp:11
static std::string & replaceAll(std::string &str, const char *what, const char *with, std::size_t whatLen=std::string::npos, std::size_t withLen=std::string::npos)
Replaces all occurrences of a string in another string.
Definition: stringops.hpp:226
float distance(const ESM::Pathgrid::Point &point, float x, float y, float z)
Definition: pathfinding.cpp:69
static void lowerCaseInPlace(std::string &inout)
Transforms input string to lower case w/o copy.
Definition: stringops.hpp:171
static std::string lowerCaseUtf8(const std::string str)
Definition: stringops.hpp:86
static Utf8Stream::UnicodeChar toLowerUtf8(Utf8Stream::UnicodeChar ch)
Definition: stringops.hpp:61