// =================================================================================================================

//        xs_UTF8Decode

// =================================================================================================================

int32 xs_UTF8Decode (uint32 &code, const xs_utf8* str, int32 len, bool strict)

{

          #define xs_U8(p,sh)          ((((xs_utf32)(str[p]))&0x3f)<<(sh))                                //bits from each "pair" byte

          #define xs_UErr8(p)          ((p)>=len || str[p]==0 || (strict&&(((str[p])&0xC0)!=0x80)))       //UTF-8 valid "pair" byte

 

          if (str==0||len==0)          {code = xs_UTF_Replace; return 0;}

          if ((*str&0x80)==0)          {code = str[0];    return 1;}

 

          if (xs_UErr8(1))             {code = xs_UTF_Replace; return strict ? 0 : 1;} //error

          if ((*str&0xe0)==0xc0)       {code = ((str[0]&0x1f)<<6)  + xs_U8(1, 0); return 2;}

 

          if (xs_UErr8(2))             {code = xs_UTF_Replace; return strict ? 0 : 2;} //error

          if ((*str&0xf0)==0xe0)       {code = ((str[0]&0x0f)<<12) + xs_U8(1, 6)  + xs_U8(2, 0); return 3;}

 

          if (xs_UErr8(3))             {code = xs_UTF_Replace; return strict ? 0 : 3;} //error

          if ((*str&0xf8)==0xf0)       {code = ((str[0]&0x07)<<18) + xs_U8(1, 12) + xs_U8(2, 6)  + xs_U8(3, 0); return 4;}

         

/*        //illegal in Unicode v3.2

          if (xs_UErr8(4))             {code = xs_UTF_Replace; return 0;}    //error

          if ((*str&0xfc)==0xf8)       {code = ((str[0]&0x03)<<24) + xs_U8(1, 18) + xs_U8(2, 12) + xs_U8(3, 6)  + xs_U8(4, 0); return 5;}

         

          if (xs_UErr8(5))             {code = xs_UTF_Replace; return 0;}    //error

          if ((*str&0xfe)==0xfc)       {code = ((str[0]&0x01)<<30) + xs_U8(1, 24) + xs_U8(2, 18) + xs_U8(3, 12) + xs_U8(4, 6)  + xs_U8(5, 0); return 6;}

*/

          // error

          code = xs_UTF_Replace;

          return strict ? 0 : 1;

}

 

// =================================================================================================================

//        UTF-8 Character encoding

// =================================================================================================================

//

// 0x00000000 - 0x0000007F: 0xxxxxxx

// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx

// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx

// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx

// 0x00200000 - 0x03FFFFFF: 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx                       //illegal in v3.2

// 0x04000000 - 0x7FFFFFFF: 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx              //illegal in v3.2

//

// =================================================================================================================