Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(ingestion): [WIP] Adding SSAS as a New Source for Data Ingestion #10286

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions metadata-ingestion/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,14 @@

slack = {"slack-sdk==3.18.1"}

ssas = {
"xmltodict==0.13.0",
"requests==2.28.1",
"beautifulsoup4==4.11.1",
"lxml==4.9.1",
"requests-kerberos==0.14.0"
}

databricks = {
# 0.1.11 appears to have authentication issues with azure databricks
"databricks-sdk>=0.9.0",
Expand Down Expand Up @@ -393,6 +401,7 @@
"sqlalchemy": sql_common,
"sql-queries": usage_common | sqlglot_lib,
"slack": slack,
"ssas": ssas,
"superset": {
"requests",
"sqlalchemy",
Expand Down Expand Up @@ -538,6 +547,7 @@
"s3",
"snowflake",
"slack",
"ssas",
"tableau",
"teradata",
"trino",
Expand Down Expand Up @@ -580,6 +590,7 @@
"ldap",
"mongodb",
"slack",
#"ssas",
"mssql",
"mysql",
"mariadb",
Expand Down Expand Up @@ -637,6 +648,8 @@
"redash = datahub.ingestion.source.redash:RedashSource",
"redshift = datahub.ingestion.source.redshift.redshift:RedshiftSource",
"slack = datahub.ingestion.source.slack.slack:SlackSource",
"ssas_multidimension = datahub.ingestion.source.ssas.ssas_multidimension.ssas_multidimension:SsasMultidimensionSource",
"ssas_tabular = datahub.ingestion.source.ssas.ssas_tabular.ssas_tabular:SsasTabularSource",
"snowflake = datahub.ingestion.source.snowflake.snowflake_v2:SnowflakeV2Source",
"superset = datahub.ingestion.source.superset:SupersetSource",
"tableau = datahub.ingestion.source.tableau:TableauSource",
Expand Down
Empty file.
54 changes: 54 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/ssas/api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""
Module xmla communicate classes.
"""

from abc import ABC, abstractmethod
from typing import Any, Dict, Union

import xmltodict
from requests.auth import HTTPBasicAuth
from requests_kerberos import HTTPKerberosAuth

from .config import SsasServerHTTPConfig
from .tools import MsXmlaTemplates
from .xmlaclient import XmlaClient


class ISsasAPI(ABC):
@abstractmethod
def get_server(self):
pass

@property
@abstractmethod
def auth_credentials(self):
pass


class SsasXmlaAPI:
"""
Class for parse ssas xmla server response
"""

def __init__(self, config: SsasServerHTTPConfig, auth: Union[HTTPKerberosAuth, HTTPBasicAuth]):
self.__config = config
self.__auth = auth
self.__client = XmlaClient(config=config, auth=self.__auth)
Comment on lines +33 to +36
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add error handling for network requests in SsasXmlaAPI.

The constructor initializes a client for making network requests. Consider adding error handling for potential network-related issues, such as connection timeouts or authentication failures.

def __init__(self, config: SsasServerHTTPConfig, auth: Union[HTTPKerberosAuth, HTTPBasicAuth]):
    self.__config = config
    self.__auth = auth
    try:
        self.__client = XmlaClient(config=config, auth=self.__auth)
    except SomeNetworkException as e:
        raise XMLAServerResponseError("Failed to initialize XMLA client") from e


def get_server_info(self) -> Dict[str, Any]:
"""
Extract server metadata info from response
"""

server_data_xml = xmltodict.parse(self.get_server_metadata())

return server_data_xml["soap:Envelope"]["soap:Body"]["DiscoverResponse"][
"return"
]["root"]["row"]["xars:METADATA"]["Server"]
Comment on lines +38 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add error handling for XML parsing in get_server_info.

The get_server_info method parses XML data without handling potential parsing errors. Consider adding try-except blocks to handle these errors gracefully.

def get_server_info(self) -> Dict[str, Any]:
    """
    Extract server metadata info from response
    """
    try:
        server_data_xml = xmltodict.parse(self.get_server_metadata())
        return server_data_xml["soap:Envelope"]["soap:Body"]["DiscoverResponse"][
            "return"
        ]["root"]["row"]["xars:METADATA"]["Server"]
    except (xmltodict.expat.ExpatError, KeyError) as e:
        raise XMLAServerResponseError("Failed to parse server metadata") from e


def get_server_metadata(self) -> str:
"""
Get ssas server metadata
"""

return str(self.__client.discover(query=MsXmlaTemplates.QUERY_METADATA))
Comment on lines +49 to +54
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add error handling for network requests in get_server_metadata.

The get_server_metadata method makes a network request without handling potential network-related issues. Consider adding error handling for these cases.

def get_server_metadata(self) -> str:
    """
    Get ssas server metadata
    """
    try:
        return str(self.__client.discover(query=MsXmlaTemplates.QUERY_METADATA))
    except SomeNetworkException as e:
        raise XMLAServerResponseError("Failed to retrieve server metadata") from e

68 changes: 68 additions & 0 deletions metadata-ingestion/src/datahub/ingestion/source/ssas/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from typing import List, Dict

import datahub.emitter.mce_builder as builder
import pydantic
from datahub.configuration.common import AllowDenyPattern
from datahub.configuration.source_common import EnvConfigMixin


class SsasServerHTTPConfig(EnvConfigMixin):
"""
Class represent config object.
Contains parameters for connect to ssas over http (HTTP Access to Analysis Services)
https://learn.microsoft.com/en-us/analysis-services/instances/configure-http-access-to-analysis-services-on-iis-8-0?view=asallproducts-allversions

username - Active Directory user login
username - Active Directory user password
host_port - XMLA gateway url (format - url:port)
server_alias - XMLA gateway server alias
virtual_directory_name -
instance - not used ???
use_https - set true if use XMLA gateway over https
dns_suffixes - list dns zone if use ssas servers in different domains.
Used to search for the main domain for the ssas server if it is not specified in the cube properties

"""
username: str = pydantic.Field(description="Windows account username")
password: str = pydantic.Field(description="Windows account password")
instance: str
host_port: str = pydantic.Field(
default="localhost:81", description="XMLA gateway url"
)
server_alias: str = pydantic.Field(default="localhost")

virtual_directory_name: str = pydantic.Field(
default="ssas", description="Report Virtual Directory URL name"
)
ssas_instance: str
use_https: bool = pydantic.Field(default=True)
ssas_instance_auth_type: str = pydantic.Field(default="HTTPKerberosAuth", description="SSAS instance auth type")

dns_suffixes: List = pydantic.Field(default_factory=list)
default_ssas_instances_by_server: Dict = pydantic.Field(default_factory=dict)

@pydantic.validator('ssas_instance_auth_type')
def check_ssas_instance_auth_type(cls, v):
if v not in ["HTTPBasicAuth", "HTTPKerberosAuth"]:
raise ValueError("Support only HTTPBasicAuth or HTTPKerberosAuth auth type")
return v

@property
def use_dns_resolver(self) -> bool:
return bool(self.dns_suffixes)

@property
def base_api_url(self) -> str:
protocol = "https" if self.use_https else "http"
return f"{protocol}://{self.host_port}/{self.virtual_directory_name}/{self.ssas_instance}/msmdpump.dll"
Comment on lines +55 to +57
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding validation for URL components.

Ensure that host_port and virtual_directory_name are not empty to avoid constructing an invalid URL.

if not self.host_port or not self.virtual_directory_name:
    raise ValueError("host_port and virtual_directory_name must not be empty.")


@property
def host(self) -> str:
return self.server_alias or self.host_port.split(":")[0]


class SsasServerHTTPSourceConfig(SsasServerHTTPConfig):
platform_name: str = "ssas"
platform_urn: str = builder.make_data_platform_urn(platform=platform_name)
report_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
chart_pattern: AllowDenyPattern = AllowDenyPattern.allow_all()
Loading
Loading