Skip to content

Commit

Permalink
Update Milvus docker-compose.yaml (#459)
Browse files Browse the repository at this point in the history
Signed-off-by: letonghan <[email protected]>
  • Loading branch information
letonghan authored Aug 13, 2024
1 parent d5b8cdf commit d3eefea
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 53 deletions.
2 changes: 1 addition & 1 deletion comps/vectorstores/langchain/milvus/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c
Customized the path to store data, default is /volumes

```bash
export DOCKER_VOLUME_DIRECTORY=./your_path
export DOCKER_VOLUME_DIRECTORY=${your_path}
```

## 2. Run Milvus service
Expand Down
22 changes: 4 additions & 18 deletions comps/vectorstores/langchain/milvus/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,6 @@ services:
etcd:
container_name: milvus-etcd
image: quay.io/coreos/etcd:v3.5.5
deploy:
resources:
limits:
cpus: "0.5"
environment:
- ETCD_AUTO_COMPACTION_MODE=revision
- ETCD_AUTO_COMPACTION_RETENTION=1000
Expand All @@ -28,10 +24,6 @@ services:
minio:
container_name: milvus-minio
image: minio/minio:RELEASE.2023-03-20T20-16-18Z
deploy:
resources:
limits:
cpus: "0.5"
environment:
MINIO_ACCESS_KEY: minioadmin
MINIO_SECRET_KEY: minioadmin
Expand All @@ -49,31 +41,25 @@ services:

standalone:
container_name: milvus-standalone
image: milvusdb/milvus:latest
deploy:
resources:
limits:
cpus: "8"
memory: 32G
image: milvusdb/milvus:v2.4.6
command: ["milvus", "run", "standalone"]
security_opt:
- seccomp:unconfined
environment:
ETCD_ENDPOINTS: etcd:2379
MINIO_ADDRESS: minio:9000
DNNL_ENABLE: 0
volumes:
- ./milvus.yaml:/milvus/configs/milvus.yaml
- ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml
- ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"]
test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"]
interval: 30s
start_period: 90s
timeout: 20s
retries: 3
ports:
- "19530:19530"
- "9092:9092"
- "9091:9091"
depends_on:
- "etcd"
- "minio"
Expand Down
70 changes: 36 additions & 34 deletions comps/vectorstores/langchain/milvus/milvus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,9 @@ minio:
region: # Specify minio storage system location region
useVirtualHost: false # Whether use virtual host mode for bucket
requestTimeoutMs: 10000 # minio timeout for request time in milliseconds
listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout
# The maximum number of objects requested per batch in minio ListObjects rpc,
# 0 means using oss client by default, decrease these configuration if ListObjects timeout
listObjectsMaxKeys: 0

# Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka.
# You can change your mq by setting mq.type field.
Expand All @@ -120,6 +122,10 @@ mq:
pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds
pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes
mqBufSize: 16 # MQ client consumer buffer length
dispatcher:
mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge
targetBufSize: 16 # the length of channel buffer for targe
maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack

# Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services.
pulsar:
Expand Down Expand Up @@ -182,7 +188,7 @@ natsmq:
# Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests
rootCoord:
dmlChannelNum: 16 # The number of dml channels created at system startup
maxPartitionNum: 4096 # Maximum number of partitions in a collection
maxPartitionNum: 1024 # Maximum number of partitions in a collection
minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed
enableActiveStandby: false
maxDatabaseNum: 64 # Maximum number of database
Expand All @@ -200,7 +206,6 @@ rootCoord:
proxy:
timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick
healthCheckTimeout: 3000 # ms, the interval that to do component healthy check
healthCheckTimetout: 3000 # ms, the interval that to do component healthy check
msgStream:
timeTick:
bufSize: 512
Expand All @@ -217,6 +222,7 @@ proxy:
ginLogging: true
ginLogSkipPaths: / # skip url path for gin log
maxTaskNum: 1024 # max task number of proxy task queue
mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection
accessLog:
enable: false # if use access log
minioEnable: false # if upload sealed access log file to minio
Expand Down Expand Up @@ -244,7 +250,7 @@ proxy:
port: # high-level restful api
acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64
enablePprof: true # Whether to enable pprof middleware on the metrics port
ip: 0.0.0.0 # if not specified, use the first unicastable address
ip: # if not specified, use the first unicastable address
port: 19530
internalPort: 19529
grpc:
Expand Down Expand Up @@ -282,6 +288,8 @@ queryCoord:
channelTaskTimeout: 60000 # 1 minute
segmentTaskTimeout: 120000 # 2 minute
distPullInterval: 500
collectionObserverInterval: 200
checkExecutedFlagInterval: 100
heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available
loadTimeoutSeconds: 600
distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds
Expand All @@ -298,6 +306,7 @@ queryCoord:
checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session
gracefulStopTimeout: 5 # seconds. force stop node without graceful stop
enableStoppingBalance: true # whether enable stopping balance
channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode
cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds
ip: # if not specified, use the first unicastable address
port: 19531
Expand All @@ -320,24 +329,30 @@ queryNode:
nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist
memExpansionRate: 1.15 # extra memory needed by building interim index
buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num
knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic
loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments
enableDisk: false # enable querynode load disk index, and search on disk index
maxDiskUsagePercentage: 95
cache:
enabled: true
memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024
readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed`
# options: async, sync, off.
# options: async, sync, disable.
# Specifies the necessity for warming up the chunk cache.
# 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the
# 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the
# chunk cache during the load process. This approach has the potential to substantially reduce query/search latency
# for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage;
# 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query.
warmup: async
# 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query.
warmup: disable
mmap:
mmapEnabled: false # Enable mmap for loading data
mmapEnabled: false # Enable mmap for loading data
lazyloadEnabled: false # Enable lazyload for loading data
lazyload:
enabled: false # Enable lazyload for loading data
waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve
requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default
requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default
maxRetryTimes: 1 # max retry times for lazy load, 1 by default
maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default
grouping:
enabled: true
maxNQ: 1000
Expand Down Expand Up @@ -403,9 +418,11 @@ indexNode:
dataCoord:
channel:
watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer.
balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch
legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels
balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing
balanceInterval: 360 # The interval with which the channel manager check dml channel balance status
checkInterval: 10 # The interval in seconds with which the channel manager advances channel states
checkInterval: 1 # The interval in seconds with which the channel manager advances channel states
notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds).
segment:
maxSize: 1024 # Maximum size of a segment in MB
Expand Down Expand Up @@ -485,7 +502,7 @@ dataNode:
coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds
segment:
insertBufSize: 16777216 # Max buffer size to flush for a single segment.
deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB
deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB
syncPeriod: 600 # The period to sync segments if buffer is not empty.
memory:
forceSyncEnable: true # Set true to force sync if memory usage is too high
Expand Down Expand Up @@ -536,8 +553,6 @@ log:
grpc:
log:
level: WARNING
serverMaxSendSize: 536870912
serverMaxRecvSize: 268435456
gracefulStopTimeout: 10 # second, time to wait graceful stop finish
client:
compressionEnabled: false
Expand All @@ -550,8 +565,6 @@ grpc:
minResetInterval: 1000
maxCancelError: 32
minSessionCheckInterval: 200
clientMaxSendSize: 268435456
clientMaxRecvSize: 536870912

# Configure the proxy tls enable.
tls:
Expand All @@ -560,18 +573,6 @@ tls:
caPemPath: configs/cert/ca.pem

common:
chanNamePrefix:
cluster: by-dev
rootCoordTimeTick: rootcoord-timetick
rootCoordStatistics: rootcoord-statistics
rootCoordDml: rootcoord-dml
replicateMsg: replicate-msg
queryTimeTick: queryTimeTick
dataCoordTimeTick: datacoord-timetick-channel
dataCoordSegmentInfo: segment-info-channel
subNamePrefix:
dataCoordSubNamePrefix: dataCoord
dataNodeSubNamePrefix: dataNode
defaultPartitionName: _default # default partition name for a collection
defaultIndexName: _default_idx # default index name
entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire
Expand Down Expand Up @@ -617,7 +618,7 @@ common:
ttMsgEnabled: true # Whether the instance disable sending ts messages
traceLogMode: 0 # trace request info
bloomFilterSize: 100000 # bloom filter initial size
maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter
maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter

# QuotaConfig, configurations of Milvus quota and limits.
# By default, we enable:
Expand All @@ -631,7 +632,7 @@ common:
# 4. DQL result rate protection;
# If necessary, you can also manually force to deny RW requests.
quotaAndLimits:
enabled: false # `true` to enable quota and limits, `false` to disable.
enabled: true # `true` to enable quota and limits, `false` to disable.
# quotaCenterCollectInterval is the time interval that quotaCenter
# collects metrics from Proxies, Query cluster and Data cluster.
# seconds, (0 ~ 65536)
Expand All @@ -649,10 +650,10 @@ quotaAndLimits:
db:
max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex
flushRate:
enabled: false
enabled: true
max: -1 # qps, default no limit, rate for flush
collection:
max: -1 # qps, default no limit, rate for flush at collection level.
max: 0.1 # qps, default no limit, rate for flush at collection level.
db:
max: -1 # qps of db level, default no limit, rate for flush
compactionRate:
Expand Down Expand Up @@ -719,6 +720,7 @@ quotaAndLimits:
limits:
maxCollectionNum: 65536
maxCollectionNumPerDB: 65536
maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit
maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes
limitWriting:
# forceDeny false means dml requests are allowed (except for some
Expand Down Expand Up @@ -786,8 +788,8 @@ quotaAndLimits:

trace:
# trace exporter type, default is stdout,
# optional values: ['stdout', 'jaeger', 'otlp']
exporter: stdout
# optional values: ['noop','stdout', 'jaeger', 'otlp']
exporter: noop
# fraction of traceID based sampler,
# optional values: [0, 1]
# Fractions >= 1 will always sample. Fractions < 0 are treated as zero.
Expand Down
77 changes: 77 additions & 0 deletions tests/test_vectorstores_langchain_milvus.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
#!/bin/bash
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

set -xe

WORKPATH=$(dirname "$PWD")
LOG_PATH="$WORKPATH/tests"
ip_address=$(hostname -I | awk '{print $1}')


function start_service() {
cd $WORKPATH/comps/vectorstores/langchain/milvus
rm -rf volumes/

docker compose up -d

sleep 60s
}

function validate_vectorstore() {
PORT="19530"
COLLECTION_NAME="test_col"

# test create collection
echo "[ test create ] creating collection.."
create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}")
echo $create_response >> ${LOG_PATH}/milvus_create_col.log
if [[ $(echo $create_response | grep '{"code":200') ]]; then
echo "[ test create ] create collection succeed"
else
echo "[ test create ] create collection failed"
exit 1
fi

# test insert data
echo "[ test insert ] inserting data.."
insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }")
echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log
if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then
echo "[ test insert ] insert data succeed"
else
echo "[ test insert ] insert data failed"
exit 1
fi

# test search data
echo "[ test search ] searching data.."
search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }")
echo $search_response>> ${LOG_PATH}/milvus_search_data.log
if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then
echo "[ test search ] search data succeed"
else
echo "[ test search ] search data failed"
exit 1
fi
}

function stop_docker() {
cid=$(docker ps -aq --filter "name=milvus-*")
if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
}

function main() {

stop_docker

start_service

validate_vectorstore

stop_docker
echo y | docker system prune

}

main

0 comments on commit d3eefea

Please sign in to comment.