-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathGenAIChat.yml
493 lines (457 loc) · 21.4 KB
/
GenAIChat.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
# The version of the Xpanse description language
version: 1.0
# The category of the service.
category: compute
# The Service provided by the ISV, the name will be shown on the console as a service.
name: terraform-ecs-genai
# The version of the service, the end-user can select the version they want to deploy.
serviceVersion: 1.0.0
# For the users may have more than one service, the @namespace can be used to separate the clusters.
description: This is an enhanced compute services by ISV-A.
namespace: ISV-A
# Icon for the service.
icon: |
data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAGQAAABkCAYAAABw4pVUAAAACXBIWXMAAAsTAAALEwEAmpwYAAACg0lEQVR4nO2dP24TURCHf65IxwFcADVHMSUHAKfCHTeIFIocgXAEXITUVrJNjsABYg6AJaBf9MRYeiAWBTn7dmbyfdLI1vqPZvx7M7OzK+tJAAAAAAAAAAAwATNJx5IuJW2S2KWkpcUWipk53ye1T9FEOa6cv3Wwsjf3ZLdVXCVTwrDPjhLAI+XhSNK2ypIwdOZ0ecxGFzG2kE5nji2k05ljC+l05thCOp05trs43Tu3IRBECNI0Q24knTmxGzJEOpEfThAEQdyVrCkyZCXpu6Q3fxwnQyYSZG2L4SOC+MiQuaS39lhDhtBD2uC9hwxBhjiYyHsm9V9M/YP3CPI7TOoNoYc4A0Gc4V2QFZO6L0HWTOq+BJkzqfsSZAgGQwRpA/fUnYEgzuCeujO8zyFD0NQRpA3eM2TFpO5LkDWTui9B5kzqvgQZgqaOIG1gMHQGggQU5My5Pbj/h0SlixhbSKczxxbS6cyxhXQ6c2whnc4c212cfizpueLRZRXks63npWLRIYgv0gpCyWpIyFWUOTYyxBk09QcqyAtJV5J+2D3y8nyhcaFkDfDuH39JO9V4pBVEB2ZGb/ZV0rnZrjo+VqakFeSQ097rSown1fGnlSilfI1BWkEO6SHf7LPv//Laub1W3jMGCPKfgnxAkPYl68q+f2dlas8zStY0ab2omvfOylTJDJr6hNeyTjnt9TcYLqx8laGQwdCBIFOQ9iyLy+8NCbmKMsdGhjiDHuIMBHEGJcsZIRtf5thCOp05tpBOZ45tv23e1raay8KRpC8Rt81bVldjtw42hNzck+3FKPZagZjZCuqT2kW0rVdlDi9NmE0Su5D0KqIYAAAAAAAAAACKz09haty1w+ee7QAAAABJRU5ErkJggg==
# Reserved for CSP, HuaweiCloud. ap-southeast-3 refers to Singapore region.
cloudServiceProvider:
name: HuaweiCloud
regions:
- name: ap-southeast-3
site: Chinese Mainland
area: Asia China
- name: ap-southeast-3
site: International
area: Asia China
- name: eu-west-0
site: International
area: Europe Pairs
- name: eu-west-101
site: Europe
area: Europe Dublin
billing:
# The supported mode of billing (`Fixed`, `Pay per Use`)
billingModes:
- Fixed
- Pay per Use
defaultBillingMode: Fixed
serviceHostingType: self
# The flavor of the service, the @category/@name/@version/@flavor can locate the specific service to be deployed.
flavors:
serviceFlavors:
- name: 16vCPUs-64GB-normal
priority: 1
# The pricing of the flavor.
pricing:
# Used to calculate charges when users select 'pay_per_use' as the billing mode.
resourceUsage:
resources:
- deployResourceKind: vm
count: 1
properties:
cloud_service_type: hws.service.type.ec2
resource_type: hws.resource.type.vm
resource_spec: c7t.4xlarge.4.linux
- deployResourceKind: volume
count: 1
properties:
cloud_service_type: hws.service.type.ebs
resource_type: hws.resource.type.volume
resource_spec: SSD
resource_size: 1000
size_measure_id: 17
- deployResourceKind: publicIP
count: 1
properties:
cloud_service_type: hws.service.type.vpc
resource_type: hws.resource.type.bandwidth
resource_spec: 12_bgp
resource_size: 300
size_measure_id: 15
licensePrices:
- regionName: any
siteName: Chinese Mainland
price:
cost: 1.50
currency: CNY
period: hourly
- regionName: any
siteName: International
price:
cost: 0.0045
currency: USD
period: hourly
- regionName: eu-west-101
siteName: Europe
price:
cost: 0.0045
currency: USD
period: hourly
markUpPrices:
- regionName: any
siteName: Chinese Mainland
price:
cost: 1.50
currency: CNY
period: hourly
- regionName: any
siteName: International
price:
cost: 0.0045
currency: USD
period: hourly
- regionName: any
siteName: Europe
price:
cost: 0.0045
currency: USD
period: hourly
# Used to calculate charges when users do not select 'pay_per_use' as the billing mode.
fixedPrices:
- regionName: any
siteName: Chinese Mainland
price:
cost: 360
currency: CNY
period: monthly
- regionName: any
siteName: International
price:
cost: 35.00
currency: USD
period: monthly
- regionName: any
siteName: Europe
price:
cost: 35.00
currency: USD
period: monthly
isPriceOnlyForManagementLayer: false
# Properties for the service, which can be used by the deployment.
properties:
flavor_id: c7t.4xlarge.4
features:
- High Availability
- Maximum performance
modificationImpact:
isDataLost: false
isServiceInterrupted: true
isDowngradeAllowed: true
# The contact details of the service.
serviceProviderContactDetails:
emails: [ "[email protected]","[email protected]" ]
phones: [ "011-13422222222","022-13344444444" ]
chats: [ "test1234","test1235" ]
websites: [ "https://hw.com","https://hwcloud.com" ]
# End user license agreement content of the service.
eula: |
This Acceptable Use Policy ("Policy") lists prohibited conduct and content when using the services provided by or on behalf of HUAWEI CLOUD and its affiliates. This Policy is an integral part of the HUAWEI CLOUD User Agreement ("User Agreement"). The examples and restrictions listed below are not exhaustive. We may update this Policy from time to time, and the updated Policy will be posted on the Website. By continuing to use the Services, you agree to abide by the latest version of this Policy. You acknowledge and agree that we may suspend or terminate the Services if you or your users violate this Policy. Terms used in the User Agreement have the same meanings in this Policy.
Prohibited Conduct
When accessing or using the Services, or allowing others to access or use the Services, you may not:
1. Violate any local, national or international laws, regulations and rules;
2. Infringe or violate the rights of others, including but not limited to privacy rights or intellectual property rights;
3. Engage in, encourage, assist or allow others to engage in any illegal, unlawful, infringing, harmful or fraudulent behavior, including but not limited to any of the following activities: harming or attempting to harm minors in any way, pornography, illegal gambling, illegal VPN construction, Ponzi schemes, cyber attacks, phishing or damage, privately intercepting any system, program or data, monitoring service data or traffic without permission, engaging in virtual currency "mining" or virtual currency transactions;
4. Transmit, provide, upload, download, use or reuse, disseminate or distribute any illegal, infringing, offensive, or harmful content or materials, including but not limited to those listed in the "Prohibited Content" below;
5. Transmit any data, send or upload any material that contains viruses, worms, Trojan horses, time bombs, keyboard loggers, spyware, adware or any other harmful programs or similar computer code designed to adversely affect the operation or security of any computer hardware or software;
6. Attack, interfere with, disrupt or adversely affect any service, hardware, software, system, website or network, including but not limited to accessing or attacking any service, hardware, software, system, website or network using large amounts of automated means (including robots, crawlers, scripts or similar data gathering or extraction methods);
7. Access any part of the Service, account or system without authorization, or attempt to do so;
8. Violate or adversely affect the security or integrity of the Services, hardware, software, systems, websites or networks;
9. Distribute, disseminate or send unsolicited email, bulk email or other messages, promotions, advertising or solicitations (such as "spam");
10. Fraudulent offers of goods or services, or any advertising, promotional or other materials containing false, deceptive or misleading statements.
deployment:
deployerTool:
# kind, Supported values are terraform, opentofu.
kind: terraform
# version, the required version of the deployer tool for the deployer scripts.
version: "=1.6.0"
serviceAvailabilityConfig:
- displayName: Availability Zone
varName: availability_zone
mandatory: false
description: The availability zone to deploy the service instance. If the value is empty, the service instance will be deployed in a random availability zone.
# Context for deployment: the context including some kind of parameters for the deployment, such as fix_env, fix_variable, env, variable, env_env, env_variable.
# - fix_env: Values for variable of this type are defined by the managed service provider in the OCL template. Runtime will inject it to deployer as environment variables. This variable is not visible to the end user.
# - fix_variable: Values for variable of this type are defined by the managed service provider in the OCL template. Runtime will inject it to deployer as usual variables. This variable is not visible to the end user.
# - env: Value for a variable of this type can be provided by end user. If marked as mandatory then end user must provide value to this variable. If marked as optional and if end user does not provided it, then the fallback value to this variable is read by runtime (it can read from other sources, e.g., OS env variables). This variable is injected as a environment variable to the deployer.
# - variable: Value for a variable of this type can be provided by end user. . If marked as mandatory then end user must provide value to this variable. If marked as optional and if end user does not provided it, then the fallback value to this variable is read by runtime (it can read from other sources, e.g., OS env variables). This variable is injected as a regular variable to the deployer.
# - env_env: Value to this variable is read by runtime (it can read from other sources, e.g., OS env variables) and injected as a environment variable to the deployer. End user cannot see or change this variable.
# - env_variable: Value to this variable is read by runtime (it can read from other sources, e.g., OS env variables) and injected as a regular variable to the deployer. End user cannot see or change this variable.
# The parameters will be used to generate the API of the managed service.
variables:
- name: admin_passwd
description: The admin password of the compute instance. If the value is empty, will create a random password.
kind: variable
dataType: string
mandatory: false
valueSchema:
minLength: 8
maxLength: 16
pattern: ^(?=.*?[A-Z])(?=.*?[a-z])(?=.*?[0-9])(?=.*?[#?!@$%^&*-]).{8,16}$
modificationImpact:
isDataLost: false
isServiceInterrupted: true
- name: image_name
description: The image name of the compute instance. If the value is empty, will use the default value to create compute instance.
kind: variable
dataType: string
example: "Ubuntu 22.04 server 64bit"
mandatory: false
value: "Ubuntu 22.04 server 64bit"
modificationImpact:
isDataLost: false
isServiceInterrupted: true
- name: vpc_name
description: The vpc name of the compute instance. If the value is empty, will use the default value to find or create VPC.
kind: variable
dataType: string
example: "ecs-vpc-default"
mandatory: false
value: "ecs-vpc-default"
modificationImpact:
isDataLost: false
isServiceInterrupted: true
- name: subnet_name
description: The sub network name of the compute instance. If the value is empty, will use the default value to find or create subnet.
kind: variable
dataType: string
example: "ecs-subnet-default"
mandatory: false
value: "ecs-subnet-default"
modificationImpact:
isDataLost: false
isServiceInterrupted: true
- name: secgroup_name
description: The security group name of the compute instance. If the value is empty, will use the default value to find or create security group.
kind: variable
dataType: string
example: "ecs-secgroup-default"
mandatory: false
value: "ecs-secgroup-default"
modificationImpact:
isDataLost: false
isServiceInterrupted: true
deployer: |
variable "region" {
type = string
description = "The region to deploy the compute instance."
}
variable "availability_zone" {
type = string
default = ""
description = "The availability zone to deploy the compute instance."
}
variable "flavor_id" {
type = string
default = "c7t.4xlarge.4"
description = "The flavor_id of the compute instance."
}
variable "image_name" {
type = string
default = "Ubuntu 22.04 server 64bit"
description = "The image name of the compute instance."
}
variable "admin_passwd" {
type = string
default = ""
description = "The root password of the compute instance."
}
variable "vpc_name" {
type = string
default = "ecs-vpc-default"
description = "The vpc name of the compute instance."
}
variable "subnet_name" {
type = string
default = "ecs-subnet-default"
description = "The subnet name of the compute instance."
}
variable "secgroup_name" {
type = string
default = "ecs-secgroup-default"
description = "The security group name of the compute instance."
}
terraform {
required_providers {
huaweicloud = {
source = "huaweicloud/huaweicloud"
version = "~> 1.61.0"
}
}
}
provider "huaweicloud" {
region = var.region
}
data "huaweicloud_availability_zones" "osc-az" {}
data "huaweicloud_vpcs" "existing" {
name = var.vpc_name
}
data "huaweicloud_vpc_subnets" "existing" {
name = var.subnet_name
}
data "huaweicloud_networking_secgroups" "existing" {
name = var.secgroup_name
}
locals {
availability_zone = var.availability_zone == "" ? data.huaweicloud_availability_zones.osc-az.names[0] : var.availability_zone
admin_passwd = var.admin_passwd == "" ? random_password.password.result : var.admin_passwd
vpc_id = length(data.huaweicloud_vpcs.existing.vpcs) > 0 ? data.huaweicloud_vpcs.existing.vpcs[0].id : huaweicloud_vpc.new[0].id
subnet_id = length(data.huaweicloud_vpc_subnets.existing.subnets)> 0 ? data.huaweicloud_vpc_subnets.existing.subnets[0].id : huaweicloud_vpc_subnet.new[0].id
secgroup_id = length(data.huaweicloud_networking_secgroups.existing.security_groups) > 0 ? data.huaweicloud_networking_secgroups.existing.security_groups[0].id : huaweicloud_networking_secgroup.new[0].id
}
resource "huaweicloud_vpc" "new" {
count = length(data.huaweicloud_vpcs.existing.vpcs) == 0 ? 1 : 0
name = var.vpc_name
cidr = "192.168.0.0/16"
}
resource "huaweicloud_vpc_subnet" "new" {
count = length(data.huaweicloud_vpcs.existing.vpcs) == 0 ? 1 : 0
vpc_id = local.vpc_id
name = var.subnet_name
cidr = "192.168.10.0/24"
gateway_ip = "192.168.10.1"
}
resource "huaweicloud_networking_secgroup" "new" {
count = length(data.huaweicloud_networking_secgroups.existing.security_groups) == 0 ? 1 : 0
name = var.secgroup_name
description = "Kafka cluster security group"
}
resource "huaweicloud_networking_secgroup_rule" "secgroup_rule_0" {
count = length(data.huaweicloud_networking_secgroups.existing.security_groups) == 0 ? 1 : 0
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 22
port_range_max = 22
remote_ip_prefix = "121.37.117.211/32"
security_group_id = local.secgroup_id
}
resource "huaweicloud_networking_secgroup_rule" "secgroup_rule_1" {
count = length(data.huaweicloud_networking_secgroups.existing.security_groups) == 0 ? 1 : 0
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 8080
port_range_max = 8088
remote_ip_prefix = "121.37.117.211/32"
security_group_id = local.secgroup_id
}
resource "huaweicloud_networking_secgroup_rule" "secgroup_rule_2" {
count = length(data.huaweicloud_networking_secgroups.existing.security_groups) == 0 ? 1 : 0
direction = "ingress"
ethertype = "IPv4"
protocol = "tcp"
port_range_min = 9090
port_range_max = 9099
remote_ip_prefix = "121.37.117.211/32"
security_group_id = local.secgroup_id
}
resource "random_id" "new" {
byte_length = 4
}
resource "random_password" "password" {
length = 12
upper = true
lower = true
numeric = true
special = true
min_special = 1
override_special = "#%@"
}
data "huaweicloud_images_image" "image" {
name = var.image_name
most_recent = true
enterprise_project_id = "0"
}
resource "huaweicloud_evs_volume" "volume" {
name = "volume-tf-${random_id.new.hex}"
description = "my volume"
volume_type = "SSD"
size = 1000
availability_zone = local.availability_zone
tags = {
foo = "bar"
key = "value"
}
}
resource "huaweicloud_compute_volume_attach" "attached" {
instance_id = huaweicloud_compute_instance.ecs-tf.id
volume_id = huaweicloud_evs_volume.volume.id
}
resource "huaweicloud_vpc_eip" "eip-tf" {
publicip {
type = "5_bgp"
}
bandwidth {
name = "eip-tf-${random_id.new.hex}"
size = 300
share_type = "PER"
charge_mode = "traffic"
}
}
resource "huaweicloud_compute_eip_associate" "associated" {
public_ip = huaweicloud_vpc_eip.eip-tf.address
instance_id = huaweicloud_compute_instance.ecs-tf.id
}
resource "huaweicloud_compute_keypair" "keypair" {
name = "keypair-xpanse-6"
}
locals {
instance_public_ip = huaweicloud_vpc_eip.eip-tf.address
}
resource "huaweicloud_compute_instance" "ecs-tf" {
availability_zone = local.availability_zone
name = "ecs-tf-${random_id.new.hex}"
flavor_id = var.flavor_id
system_disk_type = "SSD"
system_disk_size = 1000
security_group_ids = [ local.secgroup_id ]
image_id = data.huaweicloud_images_image.image.id
admin_pass = local.admin_passwd
key_pair = huaweicloud_compute_keypair.keypair.name
network {uuid = local.subnet_id}
user_data = <<-EOF
#!bin/bash
sudo apt-get update
sudo apt-get install \
apt-transport-https \
ca-certificates \
curl \
software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository --yes \
"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
$(lsb_release -cs) \
stable"
sudo apt-get update
sudo apt-get --yes install docker-ce
sudo systemctl start docker
sudo systemctl enable docker
curl -SL https://github.com/docker/compose/releases/download/v2.30.3/docker-compose-linux-x86_64 -o /usr/local/bin/docker-compose
git clone https://github.com/opea-project/GenAIExamples.git
export host_ip="${local.instance_public_ip}"
# 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
export HUGGINGFACEHUB_API_TOKEN="hf"
export EMBEDDING_MODEL_ID="BAAI/bge-base-en-v1.5"
export RERANK_MODEL_ID="BAAI/bge-reranker-base"
export LLM_MODEL_ID="Intel/neural-chat-7b-v3-3"
export INDEX_NAME="rag-redis"
cd /GenAIExamples/ChatQnA/docker_compose/intel/cpu/xeon/
source ./set_env.sh
docker compose up -d
EOF
}
data "local_file" "example" {
filename = pathexpand("./keypair-xpanse-6.pem")
depends_on = [huaweicloud_compute_keypair.keypair]
}
output "pem_content" {
value = data.local_file.example.content
}
output "ecs-host" {
value = huaweicloud_compute_instance.ecs-tf.access_ip_v4
}
output "ecs-public-ip" {
value = huaweicloud_vpc_eip.eip-tf.address
}
output "admin_passwd" {
value = var.admin_passwd == "" ? nonsensitive(local.admin_passwd) : local.admin_passwd
}