
#include
#include
#include
/****************************************************************************
Unicode符号范围 | UTF-8编码方式
(十六进制) | (二进制)
0000 0000-0000 007F:0xxxxxxx
0000 0080-0000 07FF:110xxxxx 10xxxxxx
0000 0800-0000 FFFF:1110xxxx 10xxxxxx 10xxxxxx
0001 0000-001F FFFF:11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
0020 0000-03FF FFFF:111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
0400 0000-7FFF FFFF:1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
**************************************************************************/
unsigned char utf8_look_for_table[] =
{
1,1,
1,
2,2,
3,3,
4,4,5,6,1};
#define UTFLEN(x) utf8_look_for_table[(x)]
//根据首字节,获取utf8字符所占字节数
inline int GetUtf8charByteNum(unsigned char ch)
{
int byteNum = 0;
if (ch >= 0xFC && ch < 0xFE)
byteNum = 6;
else if (ch >= 0xF8)
byteNum = 5;
else if (ch >= 0xF0)
byteNum = 4;
else if (ch >= 0xE0)
byteNum = 3;
else if (ch >= 0xC0)
byteNum = 2;
else if (0 == (ch & 0x80))
byteNum = 1;
return byteNum;
}
//判断字符串是否是utf8格式
int IsUtf8Format(const char *str)
{
int byteNum = 0;
unsigned char ch;
const char *ptr = str;
if (NulL == str)
return 0;
while (*ptr != '')
{
ch = (unsigned char)*ptr;
if (byteNum == 0) //根据首字节特性判断该字符的字节数
{
if (0 == (byteNum = GetUtf8charByteNum(ch)))
return 0;
}
else //多字节字符,非首字节格式:10xxxxxx
{
if ((ch & 0xC0) != 0x80)
return 0;
}
byteNum--;
ptr++;
}
if (byteNum > 0)
return 0;
return 1;
}
//计算utf8字符串字符个数
int GetUtf8Length(char *str)
{
int clen = 0;
int len = 0;
int byteNum = 0;
unsigned char ch;
char *ptr = str;
if (NulL == str)
return 0;
clen = strlen(str);
while (*ptr != '' && len < clen)
{
ch = (unsigned char)*ptr;
if (0 == (byteNum = GetUtf8charByteNum(ch)))
return 0;
ptr += byteNum;
len++;
}
return len;
}
int GetChargeNum(int len)
{
int num = 0;
if (len > 70 && len <= 500)
{
if (!len % 67)
num = len / 67;
else
num = len / 67 + 1;
}
else if (len > 0)
num = 1;
return num;
}
int main(int argc, char **argv)
{
//char *str = "hello 你好呀!";
char *str;
int len = 0;
int num = 0;
if (argc < 2)
return 0;
str = argv[1];
printf("%sn",str);
if (!IsUtf8Format(str))
{
printf("the text is not the Format of utf8n");
return 0;
}
if (!(len = GetUtf8Length(str)))
return 0;
printf("the length of text: %dn",len);
if (!(num = GetChargeNum(len)))
return 0;
printf("the chargeNumber of sms: %dn",num);
return 1;
}
总结以上是内存溢出为你收集整理的c语言判断是否是utf8字符串,计算字符个数全部内容,希望文章能够帮你解决c语言判断是否是utf8字符串,计算字符个数所遇到的程序开发问题。
如果觉得内存溢出网站内容还不错,欢迎将内存溢出网站推荐给程序员好友。
欢迎分享,转载请注明来源:内存溢出
微信扫一扫
支付宝扫一扫
评论列表(0条)