diff --git a/comps/vectorstores/langchain/milvus/README.md b/comps/vectorstores/langchain/milvus/README.md index d02508351..b0f19caf4 100644 --- a/comps/vectorstores/langchain/milvus/README.md +++ b/comps/vectorstores/langchain/milvus/README.md @@ -6,7 +6,7 @@ Configure your Milvus instance to suit your application scenarios by adjusting c Customized the path to store data, default is /volumes ```bash -export DOCKER_VOLUME_DIRECTORY=./your_path +export DOCKER_VOLUME_DIRECTORY=${your_path} ``` ## 2. Run Milvus service diff --git a/comps/vectorstores/langchain/milvus/docker-compose.yml b/comps/vectorstores/langchain/milvus/docker-compose.yml index 125463752..d6c39d0f0 100644 --- a/comps/vectorstores/langchain/milvus/docker-compose.yml +++ b/comps/vectorstores/langchain/milvus/docker-compose.yml @@ -7,10 +7,6 @@ services: etcd: container_name: milvus-etcd image: quay.io/coreos/etcd:v3.5.5 - deploy: - resources: - limits: - cpus: "0.5" environment: - ETCD_AUTO_COMPACTION_MODE=revision - ETCD_AUTO_COMPACTION_RETENTION=1000 @@ -28,10 +24,6 @@ services: minio: container_name: milvus-minio image: minio/minio:RELEASE.2023-03-20T20-16-18Z - deploy: - resources: - limits: - cpus: "0.5" environment: MINIO_ACCESS_KEY: minioadmin MINIO_SECRET_KEY: minioadmin @@ -49,31 +41,25 @@ services: standalone: container_name: milvus-standalone - image: milvusdb/milvus:latest - deploy: - resources: - limits: - cpus: "8" - memory: 32G + image: milvusdb/milvus:v2.4.6 command: ["milvus", "run", "standalone"] security_opt: - seccomp:unconfined environment: ETCD_ENDPOINTS: etcd:2379 MINIO_ADDRESS: minio:9000 - DNNL_ENABLE: 0 volumes: - - ./milvus.yaml:/milvus/configs/milvus.yaml + - ${DOCKER_VOLUME_DIRECTORY:-.}/milvus.yaml:/milvus/configs/milvus.yaml - ${DOCKER_VOLUME_DIRECTORY:-.}/volumes/milvus:/var/lib/milvus healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:9092/healthz"] + test: ["CMD", "curl", "-f", "http://localhost:9091/healthz"] interval: 30s start_period: 90s timeout: 20s retries: 3 ports: - "19530:19530" - - "9092:9092" + - "9091:9091" depends_on: - "etcd" - "minio" diff --git a/comps/vectorstores/langchain/milvus/milvus.yaml b/comps/vectorstores/langchain/milvus/milvus.yaml index de29dfe3d..b9f22cb3d 100644 --- a/comps/vectorstores/langchain/milvus/milvus.yaml +++ b/comps/vectorstores/langchain/milvus/milvus.yaml @@ -105,7 +105,9 @@ minio: region: # Specify minio storage system location region useVirtualHost: false # Whether use virtual host mode for bucket requestTimeoutMs: 10000 # minio timeout for request time in milliseconds - listObjectsMaxKeys: 0 # The maximum number of objects requested per batch in minio ListObjects rpc, 0 means using oss client by default, decrease these configuration if ListObjects timeout + # The maximum number of objects requested per batch in minio ListObjects rpc, + # 0 means using oss client by default, decrease these configuration if ListObjects timeout + listObjectsMaxKeys: 0 # Milvus supports four MQ: rocksmq(based on RockDB), natsmq(embedded nats-server), Pulsar and Kafka. # You can change your mq by setting mq.type field. @@ -120,6 +122,10 @@ mq: pursuitLag: 10 # time tick lag threshold to enter pursuit mode, in seconds pursuitBufferSize: 8388608 # pursuit mode buffer size in bytes mqBufSize: 16 # MQ client consumer buffer length + dispatcher: + mergeCheckInterval: 1 # the interval time(in seconds) for dispatcher to check whether to merge + targetBufSize: 16 # the length of channel buffer for targe + maxTolerantLag: 3 # Default value: "3", the timeout(in seconds) that target sends msgPack # Related configuration of pulsar, used to manage Milvus logs of recent mutation operations, output streaming log, and provide log publish-subscribe services. pulsar: @@ -182,7 +188,7 @@ natsmq: # Related configuration of rootCoord, used to handle data definition language (DDL) and data control language (DCL) requests rootCoord: dmlChannelNum: 16 # The number of dml channels created at system startup - maxPartitionNum: 4096 # Maximum number of partitions in a collection + maxPartitionNum: 1024 # Maximum number of partitions in a collection minSegmentSizeToEnableIndex: 1024 # It's a threshold. When the segment size is less than this value, the segment will not be indexed enableActiveStandby: false maxDatabaseNum: 64 # Maximum number of database @@ -200,7 +206,6 @@ rootCoord: proxy: timeTickInterval: 200 # ms, the interval that proxy synchronize the time tick healthCheckTimeout: 3000 # ms, the interval that to do component healthy check - healthCheckTimetout: 3000 # ms, the interval that to do component healthy check msgStream: timeTick: bufSize: 512 @@ -217,6 +222,7 @@ proxy: ginLogging: true ginLogSkipPaths: / # skip url path for gin log maxTaskNum: 1024 # max task number of proxy task queue + mustUsePartitionKey: false # switch for whether proxy must use partition key for the collection accessLog: enable: false # if use access log minioEnable: false # if upload sealed access log file to minio @@ -244,7 +250,7 @@ proxy: port: # high-level restful api acceptTypeAllowInt64: true # high-level restful api, whether http client can deal with int64 enablePprof: true # Whether to enable pprof middleware on the metrics port - ip: 0.0.0.0 # if not specified, use the first unicastable address + ip: # if not specified, use the first unicastable address port: 19530 internalPort: 19529 grpc: @@ -282,6 +288,8 @@ queryCoord: channelTaskTimeout: 60000 # 1 minute segmentTaskTimeout: 120000 # 2 minute distPullInterval: 500 + collectionObserverInterval: 200 + checkExecutedFlagInterval: 100 heartbeatAvailableInterval: 10000 # 10s, Only QueryNodes which fetched heartbeats within the duration are available loadTimeoutSeconds: 600 distRequestTimeout: 5000 # the request timeout for querycoord fetching data distribution from querynodes, in milliseconds @@ -298,6 +306,7 @@ queryCoord: checkNodeSessionInterval: 60 # the interval(in seconds) of check querynode cluster session gracefulStopTimeout: 5 # seconds. force stop node without graceful stop enableStoppingBalance: true # whether enable stopping balance + channelExclusiveNodeFactor: 4 # the least node number for enable channel's exclusive mode cleanExcludeSegmentInterval: 60 # the time duration of clean pipeline exclude segment which used for filter invalid data, in seconds ip: # if not specified, use the first unicastable address port: 19531 @@ -320,6 +329,7 @@ queryNode: nprobe: 16 # nprobe to search small index, based on your accuracy requirement, must smaller than nlist memExpansionRate: 1.15 # extra memory needed by building interim index buildParallelRate: 0.5 # the ratio of building interim index parallel matched with cpu num + knowhereScoreConsistency: false # Enable knowhere strong consistency score computation logic loadMemoryUsageFactor: 1 # The multiply factor of calculating the memory usage while loading segments enableDisk: false # enable querynode load disk index, and search on disk index maxDiskUsagePercentage: 95 @@ -327,17 +337,22 @@ queryNode: enabled: true memoryLimit: 2147483648 # 2 GB, 2 * 1024 *1024 *1024 readAheadPolicy: willneed # The read ahead policy of chunk cache, options: `normal, random, sequential, willneed, dontneed` - # options: async, sync, off. + # options: async, sync, disable. # Specifies the necessity for warming up the chunk cache. - # 1. If set to "sync" or "async," the original vector data will be synchronously/asynchronously loaded into the + # 1. If set to "sync" or "async" the original vector data will be synchronously/asynchronously loaded into the # chunk cache during the load process. This approach has the potential to substantially reduce query/search latency # for a specific duration post-load, albeit accompanied by a concurrent increase in disk usage; - # 2. If set to "off," original vector data will only be loaded into the chunk cache during search/query. - warmup: async + # 2. If set to "disable" original vector data will only be loaded into the chunk cache during search/query. + warmup: disable mmap: mmapEnabled: false # Enable mmap for loading data - mmapEnabled: false # Enable mmap for loading data - lazyloadEnabled: false # Enable lazyload for loading data + lazyload: + enabled: false # Enable lazyload for loading data + waitTimeout: 30000 # max wait timeout duration in milliseconds before start to do lazyload search and retrieve + requestResourceTimeout: 5000 # max timeout in milliseconds for waiting request resource for lazy load, 5s by default + requestResourceRetryInterval: 2000 # retry interval in milliseconds for waiting request resource for lazy load, 2s by default + maxRetryTimes: 1 # max retry times for lazy load, 1 by default + maxEvictPerRetry: 1 # max evict count for lazy load, 1 by default grouping: enabled: true maxNQ: 1000 @@ -403,9 +418,11 @@ indexNode: dataCoord: channel: watchTimeoutInterval: 300 # Timeout on watching channels (in seconds). Datanode tickler update watch progress will reset timeout timer. + balanceWithRpc: true # Whether to enable balance with RPC, default to use etcd watch + legacyVersionWithoutRPCWatch: 2.4.1 # Datanodes <= this version are considered as legacy nodes, which doesn't have rpc based watch(). This is only used during rolling upgrade where legacy nodes won't get new channels balanceSilentDuration: 300 # The duration after which the channel manager start background channel balancing balanceInterval: 360 # The interval with which the channel manager check dml channel balance status - checkInterval: 10 # The interval in seconds with which the channel manager advances channel states + checkInterval: 1 # The interval in seconds with which the channel manager advances channel states notifyChannelOperationTimeout: 5 # Timeout notifing channel operations (in seconds). segment: maxSize: 1024 # Maximum size of a segment in MB @@ -485,7 +502,7 @@ dataNode: coldTime: 60 # Turn on skip mode after there are only timetick msg for x seconds segment: insertBufSize: 16777216 # Max buffer size to flush for a single segment. - deleteBufBytes: 67108864 # Max buffer size in bytes to flush del for a single channel, default as 16MB + deleteBufBytes: 16777216 # Max buffer size in bytes to flush del for a single channel, default as 16MB syncPeriod: 600 # The period to sync segments if buffer is not empty. memory: forceSyncEnable: true # Set true to force sync if memory usage is too high @@ -536,8 +553,6 @@ log: grpc: log: level: WARNING - serverMaxSendSize: 536870912 - serverMaxRecvSize: 268435456 gracefulStopTimeout: 10 # second, time to wait graceful stop finish client: compressionEnabled: false @@ -550,8 +565,6 @@ grpc: minResetInterval: 1000 maxCancelError: 32 minSessionCheckInterval: 200 - clientMaxSendSize: 268435456 - clientMaxRecvSize: 536870912 # Configure the proxy tls enable. tls: @@ -560,18 +573,6 @@ tls: caPemPath: configs/cert/ca.pem common: - chanNamePrefix: - cluster: by-dev - rootCoordTimeTick: rootcoord-timetick - rootCoordStatistics: rootcoord-statistics - rootCoordDml: rootcoord-dml - replicateMsg: replicate-msg - queryTimeTick: queryTimeTick - dataCoordTimeTick: datacoord-timetick-channel - dataCoordSegmentInfo: segment-info-channel - subNamePrefix: - dataCoordSubNamePrefix: dataCoord - dataNodeSubNamePrefix: dataNode defaultPartitionName: _default # default partition name for a collection defaultIndexName: _default_idx # default index name entityExpiration: -1 # Entity expiration in seconds, CAUTION -1 means never expire @@ -617,7 +618,7 @@ common: ttMsgEnabled: true # Whether the instance disable sending ts messages traceLogMode: 0 # trace request info bloomFilterSize: 100000 # bloom filter initial size - maxBloomFalsePositive: 0.05 # max false positive rate for bloom filter + maxBloomFalsePositive: 0.001 # max false positive rate for bloom filter # QuotaConfig, configurations of Milvus quota and limits. # By default, we enable: @@ -631,7 +632,7 @@ common: # 4. DQL result rate protection; # If necessary, you can also manually force to deny RW requests. quotaAndLimits: - enabled: false # `true` to enable quota and limits, `false` to disable. + enabled: true # `true` to enable quota and limits, `false` to disable. # quotaCenterCollectInterval is the time interval that quotaCenter # collects metrics from Proxies, Query cluster and Data cluster. # seconds, (0 ~ 65536) @@ -649,10 +650,10 @@ quotaAndLimits: db: max: -1 # qps of db level, default no limit, rate for CreateIndex, DropIndex flushRate: - enabled: false + enabled: true max: -1 # qps, default no limit, rate for flush collection: - max: -1 # qps, default no limit, rate for flush at collection level. + max: 0.1 # qps, default no limit, rate for flush at collection level. db: max: -1 # qps of db level, default no limit, rate for flush compactionRate: @@ -719,6 +720,7 @@ quotaAndLimits: limits: maxCollectionNum: 65536 maxCollectionNumPerDB: 65536 + maxInsertSize: -1 # maximum size of a single insert request, in bytes, -1 means no limit maxResourceGroupNumOfQueryNode: 1024 # maximum number of resource groups of query nodes limitWriting: # forceDeny false means dml requests are allowed (except for some @@ -786,8 +788,8 @@ quotaAndLimits: trace: # trace exporter type, default is stdout, - # optional values: ['stdout', 'jaeger', 'otlp'] - exporter: stdout + # optional values: ['noop','stdout', 'jaeger', 'otlp'] + exporter: noop # fraction of traceID based sampler, # optional values: [0, 1] # Fractions >= 1 will always sample. Fractions < 0 are treated as zero. diff --git a/tests/test_vectorstores_langchain_milvus.sh b/tests/test_vectorstores_langchain_milvus.sh new file mode 100644 index 000000000..60303017d --- /dev/null +++ b/tests/test_vectorstores_langchain_milvus.sh @@ -0,0 +1,77 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +set -xe + +WORKPATH=$(dirname "$PWD") +LOG_PATH="$WORKPATH/tests" +ip_address=$(hostname -I | awk '{print $1}') + + +function start_service() { + cd $WORKPATH/comps/vectorstores/langchain/milvus + rm -rf volumes/ + + docker compose up -d + + sleep 60s +} + +function validate_vectorstore() { + PORT="19530" + COLLECTION_NAME="test_col" + + # test create collection + echo "[ test create ] creating collection.." + create_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/collections/create" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"dbName\": \"default\", \"dimension\": 2, \"metricType\": \"L2\", \"primaryField\": \"id\", \"vectorField\": \"vector\"}") + echo $create_response >> ${LOG_PATH}/milvus_create_col.log + if [[ $(echo $create_response | grep '{"code":200') ]]; then + echo "[ test create ] create collection succeed" + else + echo "[ test create ] create collection failed" + exit 1 + fi + + # test insert data + echo "[ test insert ] inserting data.." + insert_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/insert" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"data\": [{\"vector\":[1,2]}] }") + echo $insert_response >> ${LOG_PATH}/milvus_insert_data.log + if [[ $(echo $insert_response | grep '{"code":200,"data":{"insertCount":1') ]]; then + echo "[ test insert ] insert data succeed" + else + echo "[ test insert ] insert data failed" + exit 1 + fi + + # test search data + echo "[ test search ] searching data.." + search_response=$(curl -X POST "http://$ip_address:$PORT/v1/vector/search" -H "accept: application/json" -H "Content-Type: application/json" -d "{ \"collectionName\": \"$COLLECTION_NAME\", \"vector\":[1,2] }") + echo $search_response>> ${LOG_PATH}/milvus_search_data.log + if [[ $(echo $search_response | grep '{"code":200,"data":') ]]; then + echo "[ test search ] search data succeed" + else + echo "[ test search ] search data failed" + exit 1 + fi +} + +function stop_docker() { + cid=$(docker ps -aq --filter "name=milvus-*") + if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi +} + +function main() { + + stop_docker + + start_service + + validate_vectorstore + + stop_docker + echo y | docker system prune + +} + +main