diff --git a/design/MetadataProfile.md b/design/MetadataProfile.md
new file mode 100644
index 000000000..265ef28b0
--- /dev/null
+++ b/design/MetadataProfile.md
@@ -0,0 +1,81 @@
+# Metadata Profile
+
+The metadata profile contains a list of queries used to retrieve datasource metadata such as list of namespaces, workloads
+and containers. Users can create metadata profiles based on their cluster or datasource provider, such as Prometheus or
+Thanos. These profiles can be tagged to import metadata API, which will then fetch metadata according to the metadata
+profile, which further helps to create experiments followed by generating recommendations.
+
+This document describes the fields of Metadata Profile and the different set of queries supported by Kruize.
+Documentation still in progress stay tuned.
+
+## Attributes
+
+- **apiVersion** \
+ A string representing version of the Kubernetes API to create metadata profile
+- **kind** \
+ A string representing type of kubernetes object
+- **metadata** \
+ A JSON object containing Data that helps to uniquely identify the metadata profile, including a name string
+ - **name** \
+ A unique string name for identifying each metadata profile.
+- **profile_version** \
+ A double value specifying the current version of the profile.
+- **datasource** \
+ A string representing the datasource to import metadata from
+- **query_variables** \
+ Define the query variables to be used
+ - **name** \
+ name of the variable
+ - **datasource** \
+ datasource of the query
+ - **value_type** \
+ can be double or integer
+ - **query** \
+ one of the query or _aggregation_functions_ is mandatory. Both can be present.
+ - **kubernetes_object** \
+ k8s object that this query is tied to: "_deployment_", "_pod_" or "_container_"
+ - **aggregation_functions** \
+ aggregate functions associated with this variable
+ - **function** \
+ can be '_avg_', '_sum_', '_min_', '_max_'
+ - **query** \
+ corresponding query
+ - **version** \
+ Any specific version that this query is tied to
+
+### Different set of metadata queries
+
+#### Queries to import metadata across the cluster
+
+These set of queries fetch list of all the namespaces, workloads and containers present across the cluster
+
+| Name | Query |
+|-------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| namespacesAcrossCluster | sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| workloadsAcrossCluster | sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| containersAcrossCluster | sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=""}[$MEASUREMENT_DURATION_IN_MIN$m])
* on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+
+
+
+
+#### Queries to import metadata for specific org_id and cluster_id
+
+These set of queries fetch list of namespaces, workloads and containers for specific `org_id` and `cluster_id`
+
+| Name | Query |
+|------------------------------|------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| namespacesForOrgAndClusterId | sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!="", org_id="$ORG_ID$", cluster_id="$CLUSTER_ID$"}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| workloadsForOrgAndClusterId | sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="", org_id="$ORG_ID$", cluster_id="$CLUSTER_ID$"}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| containersForOrgAndClusterId | sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!="", org_id="$ORG_ID$", cluster_id="$CLUSTER_ID$"}[$MEASUREMENT_DURATION_IN_MIN$m])
* on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="", org_id="$ORG_ID$", cluster_id="$CLUSTER_ID$"}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+
+
+
+#### Queries to import metadata for custom label - ADDITIONAL_LABEL
+
+These set of queries fetch list of namespaces, workloads and containers for specific `ADDITIONAL_LABEL` - currently used by bulk and thanos demos
+
+| Name | Query |
+|------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| namespacesForAdditionalLabel | sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| workloadsForAdditionalLabel | sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m])) |
+| containersForAdditionalLabel | sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m])
* on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m])) |
diff --git a/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.json b/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.json
new file mode 100644
index 000000000..b1248012a
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.json
@@ -0,0 +1,48 @@
+{
+ "apiVersion": "recommender.com/v1",
+ "kind": "KruizeMetadataProfile",
+ "metadata": {
+ "name": "cluster-metadata-local-monitoring"
+ },
+ "profile_version": 1,
+ "k8s_type": "openshift",
+ "datasource": "prometheus",
+ "query_variables": [
+ {
+ "name": "namespacesForAdditionalLabel",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=\"\" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ },
+ {
+ "name": "workloadsForAdditionalLabel",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=\"\" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ },
+ {
+ "name": "containersForAdditionalLabel",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=\"\" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=\"\" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ }
+ ]
+}
diff --git a/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.yaml b/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.yaml
new file mode 100644
index 000000000..bacf66948
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/bulk_cluster_metadata_local_monitoring.yaml
@@ -0,0 +1,32 @@
+apiVersion: "recommender.com/v1"
+kind: "KruizeMetadataProfile"
+metadata:
+ name: "cluster-metadata-local-monitoring"
+profile_version: 1.0
+k8s_type: openshift
+datasource: prometheus
+query_variables:
+
+- name: namespacesForAdditionalLabel
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "namespace"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))'
+
+- name: workloadsForAdditionalLabel
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "container"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))'
+
+- name: containersForAdditionalLabel
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "container"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!="" ADDITIONAL_LABEL}[$MEASUREMENT_DURATION_IN_MIN$m]))'
diff --git a/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.json b/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.json
new file mode 100644
index 000000000..d24e06805
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.json
@@ -0,0 +1,48 @@
+{
+ "apiVersion": "recommender.com/v1",
+ "kind": "KruizeMetadataProfile",
+ "metadata": {
+ "name": "cluster-metadata-local-monitoring"
+ },
+ "profile_version": 1,
+ "k8s_type": "openshift",
+ "datasource": "prometheus",
+ "query_variables": [
+ {
+ "name": "namespacesAcrossCluster",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=\"\"}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ },
+ {
+ "name": "workloadsAcrossCluster",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=\"\"}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ },
+ {
+ "name": "containersAcrossCluster",
+ "datasource": "prometheus",
+ "value_type": "double",
+ "kubernetes_object": "container",
+ "aggregation_functions": [
+ {
+ "function": "sum",
+ "query": "sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=\"\"}[$MEASUREMENT_DURATION_IN_MIN$m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=\"\"}[$MEASUREMENT_DURATION_IN_MIN$m]))"
+ }
+ ]
+ }
+ ]
+}
diff --git a/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.yaml b/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.yaml
new file mode 100644
index 000000000..6d232c9e3
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/cluster_metadata_local_monitoring.yaml
@@ -0,0 +1,32 @@
+apiVersion: "recommender.com/v1"
+kind: "KruizeMetadataProfile"
+metadata:
+ name: "cluster-metadata-local-monitoring"
+profile_version: 1.0
+k8s_type: openshift
+datasource: prometheus
+query_variables:
+
+- name: namespacesAcrossCluster
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "namespace"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[$MEASUREMENT_DURATION_IN_MIN$m]))'
+
+- name: workloadsAcrossCluster
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "container"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[$MEASUREMENT_DURATION_IN_MIN$m]))'
+
+- name: containersAcrossCluster
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "container"
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (container, image, workload, workload_type, namespace) (avg_over_time(kube_pod_container_info{container!=""}[$MEASUREMENT_DURATION_IN_MIN$m]) * on (pod, namespace) group_left(workload, workload_type) avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[$MEASUREMENT_DURATION_IN_MIN$m]))'
diff --git a/manifests/autotune/metadata-profiles/kruize-metadata-profile-crd.yaml b/manifests/autotune/metadata-profiles/kruize-metadata-profile-crd.yaml
new file mode 100644
index 000000000..94be682f6
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/kruize-metadata-profile-crd.yaml
@@ -0,0 +1,88 @@
+apiVersion: "recommender.com/v1"
+kind: CustomResourceDefinition
+metadata:
+ #name must match the spec fields below, and be in the form: .
+ name: kruizemetadataprofiles.recommender.com
+spec:
+ # group name to use for REST API: /apis//
+ group: "recommender.com"
+ names:
+ plural: kruizemetadataprofiles
+ singular: kruizemetadataprofile
+ #types can be identified with this tag
+ kind: KruizeMetadataProfile
+ scope: Namespaced
+ versions:
+ - name: v1
+ served: true
+ storage: true
+ schema:
+ openAPIV3Schema:
+ type: object
+ properties:
+ apiVersion:
+ description: 'APIVersion defines the versioned schema of this representation
+ of an object. Servers should convert recognized schemas to the latest
+ internal value, and may reject unrecognized values. More info: https://git.k8s.io/
+ community/contributors/devel/sig-architecture/api-conventions.md#resources'
+ type: string
+ kind:
+ description: 'Kind is a string value representing the REST resource this
+ object represents. Servers may infer this from the endpoint the client
+ submits requests to. Cannot be updated. In CamelCase. More info: https://git.k8s.io/
+ community/contributors/devel/sig-architecture/api-conventions.md#types-kinds'
+ type: string
+ profile_version:
+ description: 'Version of the profile'
+ type: number
+ k8s_type:
+ description: 'minikube or openshift'
+ type: string
+ datasource:
+ description: 'datasource to import metadata from for eg. Prometheus, Thanos, Datadog etc'
+ type: string
+ query_variables:
+ description: 'Query variables to be used'
+ type: array
+ items:
+ type: object
+ properties:
+ name:
+ description: 'name of the variable'
+ type: string
+ datasource:
+ description: 'datasource of the query'
+ type: string
+ value_type:
+ description: 'can be double or integer'
+ type: string
+ kubernetes_object:
+ description: 'k8s object that this query is tied to: "deployment", "pod", "namespace" or "container"'
+ type: string
+ query:
+ description: 'one of the query or aggregation_functions is mandatory'
+ type: string
+ aggregation_functions:
+ description: 'one of the query or aggregation_functions is mandatory'
+ type: array
+ items:
+ type: object
+ properties:
+ function:
+ description: 'aggregate functions associated with this variable'
+ type: string
+ query:
+ description: 'query'
+ type: string
+ version:
+ description: 'Any specific version that this query is tied to'
+ type: string
+ required:
+ - function
+ - query
+ required:
+ - name
+ - datasource
+ - value_type
+ required:
+ - query_variables
diff --git a/manifests/autotune/metadata-profiles/metadata-profile-template.yaml b/manifests/autotune/metadata-profiles/metadata-profile-template.yaml
new file mode 100644
index 000000000..37836316f
--- /dev/null
+++ b/manifests/autotune/metadata-profiles/metadata-profile-template.yaml
@@ -0,0 +1,66 @@
+apiVersion: "recommender.com/v1"
+kind: "KruizeMetadataProfile"
+metadata:
+ name: "add_name_here"
+
+# Version of the profile.
+# This helps when queries change often
+profile_version: 1.0
+
+# Is this for a specific kubernetes type?
+# OPTIONAL.
+# If not present, assumed to be used universally.
+# If specified, it should have a corresponding config in
+# manifests/configmaps dir (i.e. supported target by Kruize)
+# Eg. manifests/configmaps/openshift-config.yaml
+k8s_type: openshift
+
+# Name of the datasource to import the metadata from
+# By default Prometheus datasource is connected to Kruize during initialization
+# MANDATORY
+datasource: prometheus
+
+# Describe the query variables to be used
+# MANDATORY
+query_variables:
+
+# name of the variable
+# MANDATORY
+- name: namespacesAcrossCluster
+ # datasource of the query
+ datasource: prometheus
+ # value_type. Supported are "double" or "integer".
+ # MANDATORY
+ value_type: "double"
+
+ # Any k8s object that this query is tied to
+ # eg. "deployment", "pod", "container"
+ # OPTIONAL
+ kubernetes_object: "namespace"
+
+ # One of query or aggregation_functions mandatory
+ # Both can be present together
+ query: 'sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[$MEASUREMENT_DURATION_IN_MIN$d]))'
+
+ # aggregate functions associated with this variable
+ # Eg. "avg", "sum", "max", "min"
+ aggregation_functions:
+ # MANDATORY
+ - function: sum
+ # query
+ # MANDATORY
+ query: 'sum by (namespace) (avg_over_time(kube_namespace_status_phase{namespace!=""}[$MEASUREMENT_DURATION_IN_MIN$m]))'
+
+ # Any specific versions that this query is tied to
+ # OPTIONAL
+ version: ">4.9"
+
+# list of all the workloads present in the cluster
+- name: workloadsAcrossCluster
+ datasource: prometheus
+ value_type: "double"
+ kubernetes_object: "container"
+ # sum of the pod ownership metrics, grouped by namespace, workload, and workload_type, filtering out empty workloads.
+ aggregation_functions:
+ - function: sum
+ query: 'sum by (namespace, workload, workload_type) (avg_over_time(namespace_workload_pod:kube_pod_owner:relabel{workload!=""}[$MEASUREMENT_DURATION_IN_MIN$m]))'