openssl base64编码实现分析-CSDN博客

本文链接：https://blog.csdn.net/ughome/article/details/127420988

本文详细解读了Base64编码的本质，介绍了字符替换机制和数据结构，重点分析了EVP_EncodeUpdate函数的工作流程，以及如何根据flag选择不同的替换表。编码过程中的内存管理和块处理策略也得到了深入解析。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

base64本质

就是字符替换，一个字节8位，但是base64只需要6位就可以编码一个字符，所以2的6次方式64
然后按照编码的索引在下面的数组查找替换，这里有两种数组，默认用的上面的一组，如果要用下面一组需要设置给ctx设置EVP_ENCODE_CTX_USE_SRP_ALPHABET标记

static const unsigned char data_bin2ascii[65] =
    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

/* SRP uses a different base64 alphabet */
static const unsigned char srpdata_bin2ascii[65] =
    "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./";

数据结构

    struct evp_Encode_Ctx_st {
    /* number saved in a partial encode/decode *///部分要编解码的数据量
    int num;  //待处理的数据长度，如果数据长度小于一次处理数据长度(length=48字节)，就直接保存在enc_data中，不处理，
     //等攒够了48字节才开始处理,如果输入数据大于48字节，那么就会在内部进行循环按照块(每块48字节)进行处理
    /*
     * The length is either the output line length (in input bytes) or the
     * shortest input line length that is ok.  Once decoding begins, the
     * length is adjusted up each time a longer line is decoded
     */
    int length; //编码初始化为48，解码初始化为0，中间不做更改
    /* data to encode */
    unsigned char enc_data[80];// 待编码数据
    /* number read on current line */
    int line_num; //当前行数，这个数一直是0，没看到有改变的地方
    unsigned int flags; //两个字符替换table，如果设置EVP_ENCODE_CTX_USE_SRP_ALPHABET在srp表中置换
};

编码过程分析

int EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
                      const unsigned char *in, int inl)
{
    int i, j;
    size_t total = 0;

    *outl = 0;
    if (inl <= 0)
        return 0;
    OPENSSL_assert(ctx->length <= (int)sizeof(ctx->enc_data));
    if (ctx->length - ctx->num > inl) {
    //如果缓存中空位还能放下传入的数据，把传入的数据拷贝到缓存中，直接返回
        memcpy(&(ctx->enc_data[ctx->num]), in, inl);
        ctx->num += inl;
        return 1;
    }
    //如果缓存放不下新来的数据，而且缓存中之前还有数据
    if (ctx->num != 0) {
        i = ctx->length - ctx->num;
        //把新来的数据拷贝一部分到缓存中，凑够一个分组（48字节）
        memcpy(&(ctx->enc_data[ctx->num]), in, i);
        in += i;
        inl -= i;
        //进行一次编码
        j = evp_encodeblock_int(ctx, out, ctx->enc_data, ctx->length);
        ctx->num = 0;  //缓存中都编码了，所以数据置0
        out += j; //输出缓存区位置移动j字节
        total = j;
        if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
        //如果没有有换行标志，输出增加换行符，也就是默认base64输出是有换行符的
            *(out++) = '\n';
            total++;
        }
        *out = '\0';
    }
    //缓冲区的数据处理完了，输入数据还大于一个分组(48字节)，来一个循环处理，逻辑同上，每次消耗48字节
    while (inl >= ctx->length && total <= INT_MAX) {
        j = evp_encodeblock_int(ctx, out, in, ctx->length);
        in += ctx->length;
        inl -= ctx->length;
        out += j;
        total += j;
        if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
            *(out++) = '\n';
            total++;
        }
        *out = '\0';
    }
    //如果总长度大于int类型最大值，直接报错返回
    if (total > INT_MAX) {
        /* Too much output data! */
        *outl = 0;
        return 0;
    }
    //不够一组(48字节)，剩下拷贝到缓冲区中。当最后调用final的时候处理缓冲区中的数据
    if (inl != 0)
        memcpy(&(ctx->enc_data[0]), in, inl);
    ctx->num = inl;
    *outl = total;

    return 1;
}

evp_encodeblock_int 分析

static int evp_encodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
                               const unsigned char *f, int dlen)
{
    int i, ret = 0;
    unsigned long l;
    const unsigned char *table;
	//根据不同的flag切换到不同的表
    if (ctx != NULL && (ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
        table = srpdata_bin2ascii;
    else
        table = data_bin2ascii;

    for (i = dlen; i > 0; i -= 3) {
    //每3个字节转换成4个字节,3*8(bit)=24 24/6(bit)=4
        if (i >= 3) {
        	//通过移位操作，将3字节转成4字节，然后查表替换就完了。
            l = (((unsigned long)f[0]) << 16L) |
                (((unsigned long)f[1]) << 8L) | f[2];
            *(t++) = conv_bin2ascii(l >> 18L, table);
            *(t++) = conv_bin2ascii(l >> 12L, table);
            *(t++) = conv_bin2ascii(l >> 6L, table);
            *(t++) = conv_bin2ascii(l, table);
        } else {
            l = ((unsigned long)f[0]) << 16L;
            if (i == 2)
                l |= ((unsigned long)f[1] << 8L);

            *(t++) = conv_bin2ascii(l >> 18L, table);
            *(t++) = conv_bin2ascii(l >> 12L, table);
            *(t++) = (i == 1) ? '=' : conv_bin2ascii(l >> 6L, table);
            *(t++) = '=';
        }
        ret += 4;
        f += 3;
    }

    *t = '\0';
    return ret;
}

接口使用

int EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl, const unsigned char *in, int inl);
通过上面的代码我们知道这里的输出缓冲区在没有换行的情况下，至少需要原数据长度*8/6+3(凑不够三字节，也就是多两个=号，外加最后’\0’) 。