创建知识库逻辑
1. 表单验证系统
文件位置:frontend/packages/data/knowledge/knowledge-modal-base/src/create-knowledge-modal-v2/features/add-type-content/coze-knowledge/index.tsx
知识库创建表单的验证规则:
// 知识库名称验证规则
const nameValidationRules = [
{
required: true,
whitespace: true,
message: I18n.t('dataset-name-empty-tooltip'),
},
{
pattern: /^[^"'`\\]+$/,
message: I18n.t('dataset-name-has-wrong-word-tooltip'),
},
];
// 知识库描述验证规则
const descriptionValidationRules = [
{
maxLength: 2000,
message: I18n.t('dataset-description-too-long'),
},
];
// 格式类型枚举
enum FormatType {
Text = 0, // 文本类型
Table = 1, // 表格类型
Image = 2, // 图片类型
}
// 数据导入类型枚举
enum UnitType {
TEXT_DOC = 'text_doc', // 文本文档
TABLE_DOC = 'table_doc', // 表格文档
IMAGE_FILE = 'image_file', // 图片文件
}
// 知识库创建表单数据接口
export interface CozeKnowledgeAddTypeContentFormData {
name: string; // 知识库名称
description: string; // 知识库描述
format_type: FormatType; // 格式类型
icon_uri?: Array<{ // 图标信息
url: string;
uri: string;
uid: string;
isDefault?: boolean;
}>;
}
// 图标获取逻辑
const fetchIcon = async (formatType: FormatType) => {
const { icon } = await KnowledgeApi.GetIcon({
format_type: formatType,
});
setCoverIcon(icon);
const currentCover = formApi.getValue('icon_uri');
if (!currentCover || currentCover[0]?.isDefault) {
formApi.setValue('icon_uri', [
{
url: icon?.url ?? '',
uri: icon?.uri ?? '',
uid: icon?.uri ?? '',
isDefault: true,
},
]);
}
};
设计亮点:
- 字符限制验证:知识库名称不允许包含特殊字符如引号、反斜杠等
- 长度限制:名称最大100字符,描述最大2000字符
- 格式类型支持:支持文本、表格、图片三种主要格式
- 智能图标获取:根据格式类型自动获取对应的默认图标
- 数据导入类型:根据格式类型自动匹配对应的导入类型
- 国际化错误信息:所有验证错误信息都支持多语言
2. 核心逻辑
知识库创建的核心逻辑主要在useCreateKnowledgeModalV2 Hook中:
// 知识库创建核心逻辑
const createDataset = async () => {
await formRef.current?.formApi.validate();
const { dataset_id: datasetId } = await KnowledgeApi.CreateDataset({
project_id: projectID || undefined,
name: formRef.current?.formApi.getValue('name'),
format_type: currentFormatType,
description: formRef.current?.formApi.getValue('description'),
icon_uri: formRef.current?.formApi.getValue('icon_uri')?.[0].uid,
space_id: spaceId || undefined,
});
return datasetId;
};
// 弹窗配置
const { open, close, modal } = useDataModalWithCoze({
title: (
<div data-testid={KnowledgeE2e.CreateKnowledgeModalTitle}>
{I18n.t('datasets_model_create_title')}
</div>
),
centered: true,
onCancel: () => {
close();
},
footer: (
<div className="flex w-full justify-end">
<Button
color="primary"
onClick={() => {
close();
}}
>
{I18n.t('cancel')}
</Button>
<LoadingButton
color="primary"
onClick={async () => {
beforeCreate?.(false);
const datasetId = await createDataset();
if (onFinish) {
onFinish(datasetId || '', unitType, false);
} else {
resourceNavigate.toResource?.('knowledge', datasetId);
}
}}
>
{I18n.t('kl_write_108')} {/* 创建 */}
</LoadingButton>
<LoadingButton
data-testid={KnowledgeE2e.CreateKnowledgeModalSubmitAndImportButton}
color="primary"
onClick={async () => {
beforeCreate?.(true);
const datasetId = await createDataset();
if (onFinish) {
onFinish(datasetId || '', unitType, true);
} else {
resourceNavigate.upload?.({ type: unitType });
}
}}
>
{I18n.t('kl_write_109')} {/* 创建并导入 */}
</LoadingButton>
</div>
),
});
// 返回Hook接口
return {
modal: modal(
<Form<CozeKnowledgeAddTypeContentFormData>
ref={formRef}
showValidateIcon={false}
>
<CozeKnowledgeAddTypeContent
onImportKnowledgeTypeChange={setUnitType}
onSelectFormatTypeChange={setCurrentFormatType}
/>
</Form>,
),
open: () => {
setCurrentFormatType(FormatType.Text);
open();
},
close,
};
设计亮点:
- 双模式支持:支持"仅创建"和"创建并导入"两种模式
- 表单验证:创建前进行完整的表单验证
- 状态管理:通过Hook统一管理弹窗状态和数据流
- 错误处理:完善的错误处理和用户反馈机制
- 导航控制:创建成功后智能导航到对应页面
API层设计与实现
IDL基础类型定义(knowledge.thrift)
文件位置:idl/data/knowledge/knowledge.thrift
核心代码:
include "../../base.thrift"
include "common.thrift"
namespace go data.knowledge
struct CreateDatasetRequest {
1: string name // 知识库名称,长度不超过100个字符
2: string description // 知识库描述
3: i64 space_id (agw.js_conv="str", api.js_conv="true") // 空间ID
4: string icon_uri // 知识库头像URI
5: common.FormatType format_type // 格式类型
6: i64 biz_id (agw.js_conv="str", api.js_conv="true") // 开放给第三方业务标识,coze传0或不传
7: i64 project_id (agw.js_conv="str", api.js_conv="true") // 项目ID
255: optional base.Base Base
}
struct CreateDatasetResponse {
1: i64 code
2: string msg
3: i64 dataset_id (agw.js_conv="str", api.js_conv="true")
255: optional base.BaseResp BaseResp
}
enum DatasetStatus {
DatasetProcessing = 0 // 处理中
DatasetReady = 1 // 可用
DatasetDeleted = 2 // 软删除
DatasetForbid = 3 // 禁用
DatasetFailed = 9 // 失败
}
struct Dataset {
1: i64 dataset_id (agw.js_conv="str", api.js_conv="true")
2: string name
3: string description
4: string icon_uri
5: common.FormatType format_type
6: DatasetStatus status
7: i64 create_time
8: i64 update_time
9: i64 space_id (agw.js_conv="str", api.js_conv="true")
10: i64 project_id (agw.js_conv="str", api.js_conv="true")
}
文件作用:
定义了知识库相关的数据结构,包括创建请求、响应和知识库实体定义。
创建知识库-IDL接口定义(knowledge_svc.thrift)
文件路径:idl/data/knowledge/knowledge_svc.thrift
核心代码:
include "slice.thrift"
include "knowledge.thrift"
include "document.thrift"
include "common.thrift"
include "review.thrift"
namespace go data.knowledge
service DatasetService {
// 知识库相关
knowledge.GetIconResponse GetIconForDataset(1:knowledge.GetIconRequest req) (api.post='/api/knowledge/icon/get', api.category="knowledge",agw.preserve_base="true")
knowledge.CreateDatasetResponse CreateDataset(1:knowledge.CreateDatasetRequest req) (api.post='/api/knowledge/create', api.category="knowledge",agw.preserve_base="true")
knowledge.DatasetDetailResponse DatasetDetail(1:knowledge.DatasetDetailRequest req) (api.post='/api/knowledge/detail', api.category="knowledge",agw.preserve_base="true")
knowledge.ListDatasetResponse ListDataset(1:knowledge.ListDatasetRequest req) (api.post='/api/knowledge/list', api.category="knowledge",agw.preserve_base="true")
knowledge.DeleteDatasetResponse DeleteDataset(1:knowledge.DeleteDatasetRequest req) (api.post='/api/knowledge/delete', api.category="knowledge",agw.preserve_base="true")
knowledge.UpdateDatasetResponse UpdateDataset(1:knowledge.UpdateDatasetRequest req) (api.post='/api/knowledge/update', api.category="knowledge",agw.preserve_base="true")
knowledge.GetModeConfigResponse GetModeConfig(1:knowledge.GetModeConfigRequest req)(api.get='/api/memory/table_mode_config', api.category="memory", agw.preserve_base="true")
// 文档相关
document.CreateDocumentResponse CreateDocument(1:document.CreateDocumentRequest req) (api.post='/api/knowledge/document/create', api.category="knowledge",agw.preserve_base="true")
document.ListDocumentResponse ListDocument(1:document.ListDocumentRequest req) (api.post='/api/knowledge/document/list', api.category="knowledge",agw.preserve_base="true")
document.DeleteDocumentResponse DeleteDocument(1:document.DeleteDocumentRequest req) (api.post='/api/knowledge/document/delete', api.category="knowledge",agw.preserve_base="true")
document.UpdateDocumentResponse UpdateDocument(1:document.UpdateDocumentRequest req) (api.post='/api/knowledge/document/update', api.category="knowledge",agw.preserve_base="true")
}
源码作用:定义知识库创建、更新、删除和获取相关的接口
知识库格式类型定义(common.thrift)
文件路径:idl/data/knowledge/common.thrift
核心代码:
enum FormatType {
Text = 0 // 文本格式
Table = 1 // 表格格式
Image = 2 // 图片格式
Database = 3 // 数据库格式
}
struct Icon {
1: string uri // 图标URI
2: string url // 图标URL
}
struct GetIconRequest {
1: FormatType format_type // 格式类型
255: optional base.Base Base
}
struct GetIconResponse {
1: i64 code
2: string msg
3: Icon icon
255: optional base.BaseResp BaseResp
}
源码作用:定义知识库相关的枚举类型和通用结构体
创建知识库-API接口实现(knowledge-api.ts)
文件位置:frontend/packages/arch/bot-api/src/knowledge-api.ts
核心代码:
import KnowledgeService from './idl/knowledge';
import { axiosInstance, type BotAPIRequestConfig } from './axios';
// eslint-disable-next-line @typescript-eslint/naming-convention
export const KnowledgeApi = new KnowledgeService<BotAPIRequestConfig>({
request: (params, config = {}) => {
const { headers } = config;
const reqHeaders = {
...headers,
'Agw-Js-Conv': 'str',
};
return axiosInstance.request({ ...params, ...config, headers: reqHeaders });
},
});
知识库服务类实现
文件位置:frontend/packages/arch/idl/src/auto-generated/knowledge/index.ts
核心代码:
export default class KnowledgeService<T> {
private request: any = () => {
throw new Error('KnowledgeService.request is undefined');
};
private baseURL: string | ((path: string) => string) = '';
constructor(options?: {
baseURL?: string | ((path: string) => string);
request?<R>(
params: {
url: string;
method: 'GET' | 'DELETE' | 'POST' | 'PUT' | 'PATCH';
data?: any;
params?: any;
headers?: any;
},
options?: T,
): Promise<R>;
}) {
this.request = options?.request || this.request;
this.baseURL = options?.baseURL || '';
}
/** POST /api/knowledge/create */
CreateDataset(
req?: dataset.CreateDatasetRequest,
options?: T,
): Promise<dataset.CreateDatasetResponse> {
const _req = req || {};
const url = this.genBaseURL('/api/knowledge/create');
const method = 'POST';
const data = {
name: _req['name'],
description: _req['description'],
space_id: _req['space_id'],
icon_uri: _req['icon_uri'],
format_type: _req['format_type'],
biz_id: _req['biz_id'],
project_id: _req['project_id'],
storage_location: _req['storage_location'],
Base: _req['Base'],
};
return this.request({ url, method, data }, options);
}
/** POST /api/knowledge/icon/get */
GetIcon(
req?: dataset.GetIconRequest,
options?: T,
): Promise<dataset.GetIconResponse> {
const _req = req || {};
const url = this.genBaseURL('/api/knowledge/icon/get');
const method = 'POST';
const data = { format_type: _req['format_type'] };
return this.request({ url, method, data }, options);
}
/** POST /api/knowledge/delete */
DeleteDataset(
req?: dataset.DeleteDatasetRequest,
options?: T,
): Promise<dataset.DeleteDatasetResponse> {
const _req = req || {};
const url = this.genBaseURL('/api/knowledge/delete');
const method = 'POST';
const data = { dataset_id: _req['dataset_id'], Base: _req['Base'] };
return this.request({ url, method, data }, options);
}
// ... 其他API方法
}
代码作用:KnowledgeService
类包含知识库相关的API方法:
CreateDataset
:用于创建知识库,调用/api/knowledge/create
接口GetIcon
:用于获取知识库图标,调用/api/knowledge/icon/get
接口DeleteDataset
:用于删除知识库,调用/api/knowledge/delete
接口
此文件是基于knowledge.thrift自动生成的,开发者无需手动修改。
创建知识库–结构体实现(knowledge.ts)
文件路径:frontend\packages\arch\idl\src\auto-generated\knowledge\namespaces\knowledge.ts
export interface CreateDatasetRequest {
name: string; // 知识库名称
description: string; // 知识库描述
space_id: string; // 空间ID
icon_uri?: string; // 知识库头像URI
format_type: FormatType; // 格式类型
biz_id?: string; // 业务ID
project_id?: string; // 项目ID
}
export interface CreateDatasetResponse {
code: Int64;
msg: string;
dataset_id: string; // 知识库ID
}
export interface GetIconRequest {
format_type: FormatType; // 格式类型
}
export interface GetIconResponse {
code: Int64;
msg: string;
icon: Icon; // 图标信息
}
export interface DeleteDatasetRequest {
dataset_id: string; // 知识库ID
}
export interface DeleteDatasetResponse {
code: Int64;
msg: string;
}
export enum FormatType {
Text = 0, // 文本格式
Table = 1, // 表格格式
Image = 2, // 图片格式
Database = 3, // 数据库格式
}
export enum DatasetStatus {
DatasetProcessing = 0, // 处理中
DatasetReady = 1, // 可用
DatasetDeleted = 2, // 软删除
DatasetForbid = 3, // 禁用
DatasetFailed = 9, // 失败
}
export interface Icon {
uri: string; // 图标URI
url: string; // 图标URL
}
idl2ts-cli 工具
工具名称
@coze-arch/idl2ts-cli
详细地址
项目路径:frontend/infra/idl/idl2ts-cli/
工具详细信息
版本:0.1.7
描述:IDL(Interface Definition Language)到TypeScript的转换工具
主要功能:
- gen命令:从Thrift或Protocol Buffer文件生成API代码
- filter命令:生成过滤后的API类型定义
可执行文件:idl2ts
(位于 ./src/cli.js
)
最终调用的是frontend/infra/idl/idl2ts-cli/src/cli.ts
这个文件
核心依赖:
@coze-arch/idl2ts-generator
:代码生成器@coze-arch/idl2ts-helper
:辅助工具@coze-arch/idl2ts-plugin
:插件系统commander
:命令行界面prettier
:代码格式化
使用方式:
# 生成API代码
idl2ts gen <projectRoot> [-f --format-config <formatConfig>]
# 生成过滤类型
idl2ts filter <projectRoot> [-f --format-config <formatConfig>]
许可证:Apache-2.0
作者:fanwenjie.fe@bytedance.com
这个工具是Coze Studio项目中统一处理所有IDL文件(包括knowledge.thrift
和相关知识库IDL文件)的核心工具,确保了整个项目中知识库API代码生成的一致性。
知识库相关IDL文件处理
该工具主要处理以下知识库相关的IDL文件:
- knowledge.thrift - 知识库核心数据结构定义
- knowledge_svc.thrift - 知识库服务接口定义
- common.thrift - 知识库通用类型定义
- document.thrift - 知识库文档相关定义
生成的知识库相关代码
在知识库开发过程中,该工具能够:
- 自动生成知识库API类型:包括CreateDatasetRequest、CreateDatasetResponse等接口类型
- 生成知识库服务类:KnowledgeService类及其所有方法
- 确保类型安全:所有知识库API调用都有完整的TypeScript类型支持
- 统一代码风格:通过Prettier确保生成代码的格式一致性
- 支持多种格式:处理文本、表格、图片等不同格式的知识库类型定义
知识库开发工作流
# 1. 修改知识库相关IDL文件
vi idl/data/knowledge/knowledge.thrift
# 2. 生成TypeScript代码
idl2ts gen frontend/
# 3. 自动更新前端API接口
# 生成路径:frontend/packages/arch/idl/src/auto-generated/knowledge/
这确保了知识库功能的前后端接口定义始终保持同步,大大提高了开发效率和代码质量。