#include <stdlib.h>
#include "utf8.h"
const char *u8_to_u4(splitbuf_t *sb, const char *cs) {
char b;
const char *r, *s = cs;
u32 c;
for(; *s; s++) {
b = *s;
r = s;
if(b & 0x80) {
if((b & 0xFE) == 0xFC) {
c = (b & 0x01) << 30;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 24;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 18;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 12;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 6;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= b & 0x3F;
if(c < (1<<26))
return r;
} else if((b & 0xFC) == 0xF8) {
c = (b & 0x03) << 24;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 18;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 12;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 6;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= b & 0x3F;
if(c < (1<<21))
return r;
} else if((b & 0xF8) == 0xF0) {
c = (b & 0x07) << 18;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 12;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 6;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= b & 0x3F;
if(c < (1<<16))
return r;
} else if((b & 0xF0) == 0xE0) {
c = (b & 0x0F) << 12;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= (b & 0x3F) << 6;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= b & 0x3F;
if(c < (1<<11))
return r;
} else if((b & 0xE0) == 0xC0) {
c = (b & 0x1F) << 6;
if(((b = *++s) & 0xC0) != 0x80)
return r;
c |= b & 0x3F;
if(c < (1<<7))
return r;
} else {
return r;
}
} else {
c = b;
}
if(sb)
splitbuf_raw(sb, (char *)&c, sizeof c);
}
return NULL;
}
const char *u8check(const char *c) {
return u8_to_u4(NULL, c);
}
#define WCC1 0x300
static const unichar_t wcc[] = {
0x0, WCC1, 0x34F, 0x360, 0x363, 0x483, 0x487, 0x488, 0x48A, 0x591,
0x5A2, 0x5A3, 0x5BA, 0x5BB, 0x5BE, 0x5BF, 0x5C0, 0x5C1, 0x5C3, 0x5C4,
0x5C5, 0x64B, 0x656, 0x670, 0x671, 0x6D6, 0x6E5, 0x6E7, 0x6E9, 0x6EA,
0x6EE, 0x70F, 0x710, 0x711, 0x712, 0x730, 0x74B, 0x7A6, 0x7B1, 0x901,
0x903, 0x93C, 0x93D, 0x941, 0x949, 0x94D, 0x94E, 0x951, 0x955, 0x962,
0x964, 0x981, 0x982, 0x9BC, 0x9BD, 0x9C1, 0x9C5, 0x9CD, 0x9CE, 0x9E2,
0x9E4, 0xA02, 0xA03, 0xA3C, 0xA3D, 0xA41, 0xA43, 0xA47, 0xA49, 0xA4B,
0xA4E, 0xA70, 0xA72, 0xA81, 0xA83, 0xABC, 0xABD, 0xAC1, 0xAC6, 0xAC7,
0xAC9, 0xACD, 0xACE, 0xB01, 0xB02, 0xB3C, 0xB3D, 0xB3F, 0xB40, 0xB41,
0xB44, 0xB4D, 0xB4E, 0xB56, 0xB57, 0xB82, 0xB83, 0xBC0, 0xBC1, 0xBCD,
0xBCE, 0xC3E, 0xC41, 0xC46, 0xC49, 0xC4A, 0xC4E, 0xC55, 0xC57, 0xCBF,
0xCC0, 0xCC6, 0xCC7, 0xCCC, 0xCCE, 0xD41, 0xD44, 0xD4D, 0xD4E, 0xDCA,
0xDCB, 0xDD2, 0xDD5, 0xDD6, 0xDD7, 0xE31, 0xE32, 0xE34, 0xE3B, 0xE47,
0xE4F, 0xEB1, 0xEB2, 0xEB4, 0xEBA, 0xEBB, 0xEBD, 0xEC8, 0xECE, 0xF18,
0xF1A, 0xF35, 0xF36, 0xF37, 0xF38, 0xF39, 0xF3A, 0xF71, 0xF7F, 0xF80,
0xF85, 0xF86, 0xF88, 0xF90, 0xF98, 0xF99, 0xFBD, 0xFC6, 0xFC7, 0x102D,
0x1031, 0x1032, 0x1033, 0x1036, 0x1038, 0x1039, 0x103A, 0x1058, 0x105A,
0x1100, 0x1160, 0x17B7, 0x17BE, 0x17C6, 0x17C7, 0x17C9, 0x17D4, 0x180B,
0x180F, 0x18A9, 0x18AA, 0x200B, 0x2010, 0x202A, 0x202F, 0x206A, 0x2070,
0x20D0, 0x20E4, 0x2E80, 0x3008, 0x300C, 0x3014, 0x3016, 0x3018, 0x301C,
0x302A, 0x3030, 0x303F, 0x3041, 0x3095, 0x3099, 0x309B, 0xA4C7, 0xAC00,
0xD7A4, 0xF8F0, 0xF900, 0xFA2E, 0xFB1E, 0xFB1F, 0xFE20, 0xFE24, 0xFE30,
0xFE6C, 0xFEFF, 0xFF00, 0xFF01, 0xFF5F, 0xFFE0, 0xFFE7, 0xFFF9, 0xFFFC,
#if 1
0x1D167, 0x1D16A, 0x1D173, 0x1D183, 0x1D185, 0x1D18C, 0x1D1AA, 0x1D1AE,
0x20000, 0x2A6D7, 0x2F800, 0x2FA1E, 0xE0001, 0xE0002, 0xE0020, 0xE0080
#endif
};
static const int wcclen = sizeof wcc / sizeof *wcc;
#define WWS0 1
static const int_least8_t wws[] = {
WWS0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 2, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 1,
2, 1, 2, 1, 2, 0, 2, 1, 2, 1, 0, 2, 1, 2, 1, 0, 2, 1, 0, 1, 0, 1, 2, 1,
0, 1, 2, 1, 2, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 2, 1, 2, 1, 0, 1, 0, 1
};
size_t utf8_width(unichar_t c) {
int p, q, r;
unichar_t d;
if(c < WCC1)
return WWS0;
p = 0;
q = wcclen;
for(;;) {
r = (p + q)/2;
d = wcc[r];
if(d < c) {
if(p == r)
break;
p = r;
} else if(d > c) {
q = r;
} else {
break;
}
}
return wws[r];
}
size_t utf8_strwidth(const unichar_t *s) {
size_t r = 0;
unichar_t c;
while((c = *s++))
r += c < WCC1 ? WWS0 : utf8_width(c);
return r;
}