diff --git a/xbrl/instance.py b/xbrl/instance.py index d88d67c..45ff150 100644 --- a/xbrl/instance.py +++ b/xbrl/instance.py @@ -347,17 +347,21 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas + + # initalise a set that will store cached taxonomy schemas uris to avoid recursive loops + imported_schema_uris = set() + if is_url(schema_uri): # fetch the taxonomy extension schema from remote - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris) elif instance_url: # fetch the taxonomy extension schema from remote by reconstructing the url schema_url = resolve_uri(instance_url, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris) else: # try to find the taxonomy extension schema file locally because no full url can be constructed schema_path = resolve_uri(instance_path, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) # parse contexts and units context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy, diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py index 4150139..79dc750 100644 --- a/xbrl/taxonomy.py +++ b/xbrl/taxonomy.py @@ -588,26 +588,28 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or @lru_cache(maxsize=60) -def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema: +def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris : set) -> TaxonomySchema: """ Parses a taxonomy schema file from the internet :param schema_url: full link to the taxonomy schema :param cache: :class:`xbrl.cache.HttpCache` instance + :param imported_schema_uris: set of already imported schema uris :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object """ if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. ' 'Please use parse_taxonomy to parse local taxonomy schemas') schema_path: str = cache.cache_file(schema_url) - return parse_taxonomy(schema_path, cache, schema_url) + return parse_taxonomy(schema_path, cache, imported_schema_uris, schema_url) -def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema: +def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : set, schema_url: str or None = None) -> TaxonomySchema: """ Parses a taxonomy schema file. :param schema_path: url to the schema (on the internet) :param cache: :class:`xbrl.cache.HttpCache` instance + :param imported_schema_uris: set of already imported schema uris :param schema_url: if this url is set, the script will try to fetch additionally imported files such as linkbases or imported schemas from the remote location. If this url is None, the script will try to find those resources locally. :return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object @@ -633,18 +635,25 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = if import_uri == "": continue + # Skip already imported URIs + if import_uri in imported_schema_uris: + continue + # sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd" if is_url(import_uri): # fetch the schema file from remote taxonomy.imports.append(parse_taxonomy_url(import_uri, cache)) + imported_schema_uris.add(import_uri) elif schema_url: # fetch the schema file from remote by reconstructing the full url import_url = resolve_uri(schema_url, import_uri) taxonomy.imports.append(parse_taxonomy_url(import_url, cache)) + imported_schema_uris.add(import_uri) else: # We have to try to fetch the linkbase locally because no full url can be constructed import_path = resolve_uri(schema_path, import_uri) taxonomy.imports.append(parse_taxonomy(import_path, cache)) + imported_schema_uris.add(import_uri) role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES) # parse ELR's