https://github.com/chroma-core/chroma
pip install chromadb
轻量级向量数据库,目前只支持 CPU 计算
pythonimport chromadb
chroma_client = chromadb.Client()
# 数据持久化
chroma_client = chromadb.PersistentClient(path="./chromadb_save")
chroma_client.heartbeat() # 返回纳米时间戳心跳,测试链接是否保持
chroma run --host 0.0.0.0 --port 8000 --path /db_path --log-path /var/log/chroma.log
shell# 生成密码哈希
htpasswd -Bbn admin password > server.htpasswd
# 设置环境变量
export CHROMA_SERVER_AUTH_CREDENTIALS_FILE="server.htpasswd"
export CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER='chromadb.auth.providers.HtpasswdFileServerAuthCredentialsProvider'
export CHROMA_SERVER_AUTH_PROVIDER='chromadb.auth.basic.BasicAuthServerProvider'
pythonclient = chromadb.HttpClient(
settings=Settings(
chroma_client_auth_provider="chromadb.auth.basic.BasicAuthClientProvider",
chroma_client_auth_credentials="admin:password"
)
)
TOKENS:必须是字母数字的 ASCII 字符串。TOKENS 区分大小写
shell# 设置环境变量
export CHROMA_SERVER_AUTH_CREDENTIALS="test-token"
export CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER="chromadb.auth.token.TokenConfigServerAuthCredentialsProvider"
export CHROMA_SERVER_AUTH_PROVIDER="chromadb.auth.token.TokenAuthServerProvider"
# 使用 X-Chroma-Token: test-token 类型的 authentication header,设置额外的环境变量
export CHROMA_SERVER_AUTH_TOKEN_TRANSPORT_HEADER="X_CHROMA_TOKEN"
pythonclient = chromadb.HttpClient(
settings=Settings(
chroma_client_auth_provider="chromadb.auth.token.TokenAuthClientProvider",
chroma_client_auth_credentials="test-token"
)
)
docker run -d --name chromadb-container -p 8899:8000 chromadb/chroma
ymlversion: '3.9'
networks:
net:
driver: bridge
services:
server:
image: ghcr.io/chroma-core/chroma:latest
environment:
- IS_PERSISTENT=TRUE
volumes:
- /chroma_data:/chroma/chroma/
ports:
- 11111:8000
pip install chromadb-client
pythonchroma_client = chromadb.HttpClient(host='localhost', port=8000)
pythoncollection = chroma_client.create_collection(
name="my_collection",
embedding_function=emb_fn, # 修改向量化模型(https://docs.trychroma.com/guides/embeddings),默认向量模型:all-MiniLM-L6-v2
metadata={"hnsw:space": "cosine"} # 自定义计算向量距离的方法,{'cosine': '余弦相似度', 'ip': '内积', 'l2': '欧式距离'},默认值为 'l2'
)
pythonchroma_client.delete_collection(
name="my_collection"
)
pythonchroma_client.count_collections()
pythonchroma_client.list_collections()
pythoncollection = chroma_client.get_collection(name="my_collection")
如果存在就获取,不存在就创建,参数与 create_collection 相同
pythoncollection = chroma_client.get_or_create_collection(name="my_collection")
pythoncollection.add(
documents=["lorem ipsum...", "doc2", "doc3", ...],
embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...],
metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
ids=["id1", "id2", "id3", ...]
)
.add
相同的 ID 两次将导致仅存储初始值pythoncollection.query(
query_texts=['xxx', 'xxx'],
query_embeddings=[[11.1, 12.1, 13.1],[1.1, 2.3, 3.2], ...],
n_results=10,
where={"metadata_field": "is_equal_to_this"},
where_document={"$contains":"search_string"},
include=["embeddings", "metadatas", "documents", "distances"]
)
$contains
,不包含:$not_contains
pythoncollection.get(
ids=["id1", "id2", "id3", ...],
where={"style": "style1"},
include=["embeddings", "metadatas", "documents"]
)
pythoncollection.update(
ids=["id1", "id2", "id3", ...],
embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...],
metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
documents=["doc1", "doc2", "doc3", ...],
)
如果存在就更新,不存在就添加
pythoncollection.upsert(
ids=["id1", "id2", "id3", ...],
embeddings=[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [1.1, 2.3, 3.2], ...],
metadatas=[{"chapter": "3", "verse": "16"}, {"chapter": "3", "verse": "5"}, {"chapter": "29", "verse": "11"}, ...],
documents=["doc1", "doc2", "doc3", ...],
)
pythoncollection.delete(
ids=["id1", "id2", "id3",...],
where={"chapter": "20"},
where_document={"$contains":"search_string"}
)