1 #ifndef MISC_UTF8ITER_HPP
2 #define MISC_UTF8ITER_HPP
12 typedef unsigned char const *
Point;
59 if ((*cur & 0x80) == 0)
63 return std::make_pair (chr, cur);
72 return std::make_pair (
sBadChar(), cur);
74 Point eoc = cur + octets;
77 return std::make_pair (
sBadChar(), cur);
81 if ((*cur & 0xC0) != 0x80)
82 return std::make_pair (
sBadChar(), cur);
87 return std::make_pair (chr, cur);
92 static std::pair <int, UnicodeChar>
octet_count (
unsigned char octet)
96 unsigned char mark = 0xC0;
97 unsigned char mask = 0xE0;
99 for (octets = 1; octets <= 5; ++octets)
101 if ((octet & mask) == mark)
104 mark = (mark >> 1) | 0x80;
105 mask = (mask >> 1) | 0x80;
108 return std::make_pair (octets, octet & ~mask);
unsigned char const * Point
Definition: utf8stream.hpp:12
Utf8Stream(Point begin, Point end)
Definition: utf8stream.hpp:17
bool eof() const
Definition: utf8stream.hpp:32
UnicodeChar consume()
Definition: utf8stream.hpp:49
Point end
Definition: utf8stream.hpp:118
static UnicodeChar sBadChar()
Definition: utf8stream.hpp:15
Definition: utf8stream.hpp:7
Utf8Stream(const char *str)
Definition: utf8stream.hpp:22
Utf8Stream(std::pair< Point, Point > range)
Definition: utf8stream.hpp:27
static std::pair< int, UnicodeChar > octet_count(unsigned char octet)
Definition: utf8stream.hpp:92
uint32_t UnicodeChar
Definition: utf8stream.hpp:11
static std::pair< UnicodeChar, Point > decode(Point cur, Point end)
Definition: utf8stream.hpp:57
Point current() const
Definition: utf8stream.hpp:37
Point nxt
Definition: utf8stream.hpp:117
void next()
Definition: utf8stream.hpp:111
UnicodeChar peek()
Definition: utf8stream.hpp:42
Point cur
Definition: utf8stream.hpp:116
UnicodeChar val
Definition: utf8stream.hpp:119