Milvus版本 2.3.5
<dependency> <groupId>io.milvus</groupId> <artifactId>milvus-sdk-java</artifactId> <version>2.3.4</version> 还没有2.3.5依赖 <!-- <scope>provided</scope>--> </dependency>
一、创建操作
1、python版本
from pymilvus import Collection, FieldSchema, DataType, CollectionSchema, connections
from pymilvus.orm import utility, db
from knowledge_brain.milvus_sink import milvus_sink
from study.connect import Connect
class MilvusOperatC:
def __init__(self, host, port, user, password, db_name ,alias,collection_name,schema,num_shards): #,collection_name,schema,shards_num
print("加载milvus依赖")
self.host = host
self.port = port
self.user = user
self.password = password
self.db_name = db_name
self.alias = alias
self.collection_name = collection_name
self.schema = schema
self.num_shards = num_shards
self.collection = self.con()
#创建连接
"""
host ip
port 端口
user 用户名
password 密码
db_name 数据库
alias 别名
"""
def con(self):
connections.connect(
host=self.host,
user=self.user,
password=self.password,
port=self.port,
alias=self.alias)
# 是否有该数据库,无则新建
print("建立数据库连接~~~~")
if self.db_name in db.list_database():
pass
else:
print("没有%s数据库,进行新建~~~~" % self.db_name)
db.create_database(self.db_name)
print("新建%s数据库完成!" % self.db_name)
# 使用数据库,建立数据库连接
db.using_database(self.db_name)
# 是否有该集合,无则创建
if utility.has_collection(self.collection_name):
print("集合已存在")
pass
else:
print("没有%s集合,进行新建~~~~" % self.collection_name)
self.col_create()
print("新建%s集合完成!" % self.collection_name)
collection = Collection(self.collection_name)
print("数据库连接完成!")
return collection
#集合创建
"""
collection_name 集合名称
schema 表头描述信息
database 数据库信息
shards_num 分片信息
"""
def col_create(self):
# fields = [
# FieldSchema(name='vec', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=1024),
# FieldSchema(name='doc_slicing_id', dtype=DataType.VARCHAR, descrition='doc_slicing_id', max_length=100,
# is_primary=True),
# FieldSchema(name='doc_id', dtype=DataType.VARCHAR, descrition='doc_id', max_length=100)
# ]
# schema = CollectionSchema(fields=fields, description=self.col_name)
collection = Collection(name=self.collection_name, schema=self.schema,num_shards=self.num_shards)
print(collection)
#索引创建
def index_create(self,index_params:str,vec_field:str):
# create IVF_FLAT index for collection.
# index_params = {
# 'metric_type': 'L2',
# 'index_type': "IVF_FLAT",
# 'params': {"nlist": 150}
# }
self.collection.create_index(field_name=vec_field, index_params=index_params)
# collection.create_index(
# field_name="doc_id",
# index_name="doc_id_index"
# )
# collection.load()
#分区创建
def partition_create(self,partition_name:str):
self.collection.create_partition(partition_name)
def load(self):
self.collection.load()
print("数据load成功")
def unload(self):
self.collection.release()
if __name__ == '__main__':
host='XX.17.38'
port='31639'
user='Milvus'
password='Milvus'
db_name='knowledge_test'
alias='default'
fields = [
FieldSchema(name='pk', dtype=DataType.INT64, descrition='主键', max_length=200, is_primary=True,
auto_id=True),
FieldSchema(name='car_model', dtype=DataType.VARCHAR, descrition='car_model', max_length=65535, is_primary=False,
auto_id=False),
FieldSchema(name='text', dtype=DataType.VARCHAR, descrition='page_content', max_length=65535, is_primary=False,
auto_id=False),
FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=1024)
]
schema = CollectionSchema(fields=fields, description='集合描述')
# Collection('test',schema)
# MilvusCreate(host,port,user,password,db_name,alias,'test',schema,2)
milvus_sink(db_name,'test',1,1,1)
python调用
from pymilvus import FieldSchema, DataType, CollectionSchema
from study.MilvusOperatC import MilvusOperatC
if __name__ == '__main__':
host='XXX.17.38'
port='31639sss'
user='Milvus'
password='Milvus'
db_name='knowledge_test'
alias='default'
fields = [
FieldSchema(name='pk', dtype=DataType.INT64, descrition='主键', max_length=200, is_primary=True,
auto_id=True),
FieldSchema(name='car_model', dtype=DataType.VARCHAR, descrition='car_model', max_length=65535,
is_primary=False,
auto_id=False),
FieldSchema(name='text', dtype=DataType.VARCHAR, descrition='page_content', max_length=65535, is_primary=False,
auto_id=False),
FieldSchema(name='vector', dtype=DataType.FLOAT_VECTOR, descrition='embedding vectors', dim=1024)
]
schema = CollectionSchema(fields=fields, description='集合描述')
index_params = {
'metric_type': 'L2',#COSINE IP
'index_type': "IVF_FLAT",
'params': {"nlist": 150}
}
#embedding字段名称
vec_field = "vector"
mc = MilvusOperatC(host,port,user,password,db_name,alias,'test',schema,2)
mc.index_create(index_params,vec_field)
mc.partition_create('2024032103')
mc.load()
mc.unload()
search_params = {
"metric_type": "L2",
"offset": 0,
"ignore_growing": False,
"params": {"nprobe": 10}
}
mc1 = MilvusOperatR(host, port, user, password, db_name, alias,'test')
res = mc1.search(search_params=search_params,embeddings=[[0.5441558957099915,0.9319176077842712]],anns_field='vector',topK=10,expr=None,output_fields=['id','vector'])
print(res)
output_fields = ["id"]
res1 = mc1.query(expr="",offset=0,topK=10,output_fields=output_fields)
print(res1)
res2 = mc1.count(expr="",output_fields=["count(*)"])
print(res2)
2、java版本
package com.XXXX.milvus;
import com.google.gson.internal.$Gson$Preconditions;
import io.milvus.client.MilvusServiceClient;
import io.milvus.grpc.DataType;
import io.milvus.grpc.GetLoadStateResponse;
import io.milvus.grpc.GetLoadingProgressResponse;
import io.milvus.param.*;
import io.milvus.param.collection.*;
import io.milvus.param.index.CreateIndexParam;
import io.milvus.param.partition.CreatePartitionParam;
/**
* @author yangyingchun
* @version 1.0
* @date 2024/3/20 16:02
*/
public class MilvusOperateC {
/**
* 获取连接
* @param host
* @param port
* @param username
* @param password
* @param database
* @return
*/
public static MilvusServiceClient getConn(String host,Integer port,String username,String password,String database){
MilvusServiceClient milvusServiceClient= new MilvusServiceClient(ConnectParam
.newBuilder()
.withHost(host)
.withPort(port)
.withAuthorization(userna