glibc库的memset和自写的memset效率对比
glibc下的memset的实现方式是以以下思路进行
#define OPSIZ (sizeof(unsigned long int))
1、长度len小于8的时候,会一位一位进行赋值。
2、长度len大于或者等于8的时候,32位机器按每次4个字节OPSIZ来读写,64位机器按每次8个字节OPSIZ来读写,连续做8次。剩下的接着按每次OPSIZ字节读写, 最后是每次1字节读写。即:8 * OPSIZ * x +OPSIZ * y + 1 * z
代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <fcntl.h>
#include <sys/time.h>
#define size_t unsigned int
#define op_t unsigned long int
#define OPSIZ (sizeof(op_t))
typedef unsigned char byte;
#if 0
struct timeval {
time_t tv_sec; /* seconds */
suseconds_t tv_usec; /* microseconds */
};
#endif
void *lib_memset (void *dstpp, int c, size_t len)
{
int i = 0;
long int dstp = (long int) dstpp;
if (len >= 8)
{
size_t xlen;
op_t cccc;
cccc = (unsigned char) c;//F4
cccc |= cccc << 8;//F4F4
cccc |= cccc << 16;//F4F4F4F4
if (OPSIZ > 4)
/* Do the shift in two steps to avoid warning if long has 32 bits. */
cccc |= (cccc << 16) << 16;
//F4F4F4F4F4F4F4F4
/* There are at least some bytes to set.
No need to test for LEN == 0 in this alignment loop. */
//如果地址不对齐,则先将地址对齐
while (dstp % OPSIZ != 0)
{
((byte *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
/* Write 8 `op_t' per iteration until less than 8 `op_t' remain. */
//按照OPSIZ*8个字节处理
xlen = len / (OPSIZ * 8);//0
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
((op_t *) dstp)[1] = cccc;
((op_t *) dstp)[2] = cccc;
((op_t *) dstp)[3] = cccc;
((op_t *) dstp)[4] = cccc;
((op_t *) dstp)[5] = cccc;
((op_t *) dstp)[6] = cccc;
((op_t *) dstp)[7] = cccc;
dstp += 8 * OPSIZ;
xlen -= 1;
}
len %= OPSIZ * 8;//10
/* Write 1 `op_t' per iteration until less than OPSIZ bytes remain. */
xlen = len / OPSIZ;//1
//按照OPSIZ个字节处理,一次处理8个字节
while (xlen > 0)
{
((op_t *) dstp)[0] = cccc;
dstp += OPSIZ;
xlen -= 1;
}
len %= OPSIZ;//2
}
//处理不能整除的len%OPSIZ个字节,即一个字节一个字节处理
/* Write the last few bytes. */
while (len > 0)
{
((byte *) dstp)[0] = c;
dstp += 1;
len -= 1;
}
return dstpp;
}
void *zt_memset(void *s,int c,size_t n)
{
const unsigned char uc = c; //unsigned char占1字节,意味着只截取c的后八位
unsigned char *su;
for(su = s;0 < n;++su,--n)
*su = uc;
return s;
}
int main(void)
{
int i = 0;
unsigned char *buff = malloc(1000000);
struct timeval start;
struct timeval end;
gettimeofday(&start, NULL);
zt_memset(buff,244,1000000);
gettimeofday(&end, NULL);
printf("%s %d %d %d\n",__func__,__LINE__,end.tv_sec-start.tv_sec,end.tv_usec-start.tv_usec);
gettimeofday(&start, NULL);
lib_memset (buff, 244, 1000000);
gettimeofday(&end, NULL);
printf("%s %d %d %d\n",__func__,__LINE__,end.tv_sec-start.tv_sec,end.tv_usec-start.tv_usec);
free(buff);
return 0;
}
实验结果为:
从结果得知:glibc的实现效率要比自己的实现更高,原因为:自己写的zt_memset是一个字节一个字节处理的,但是lib_memset是按照处理器可以处理的最大字节数进行处理的,即64位一次性可以处理8字节,但是zt_memset一次只处理一个字节