Skip to content

Commit

Permalink
fixed recursion loops with imported_schema_uris set that is passed in…
Browse files Browse the repository at this point in the history
…to the recurcive function parse_taxonomy
  • Loading branch information
Sam-el0 committed Apr 21, 2024
1 parent cbc28fc commit a38b9db
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 6 deletions.
10 changes: 7 additions & 3 deletions xbrl/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,17 +347,21 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()

if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris)
elif instance_url:
# fetch the taxonomy extension schema from remote by reconstructing the url
schema_url = resolve_uri(instance_url, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris)
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# parse contexts and units
context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy,
Expand Down
15 changes: 12 additions & 3 deletions xbrl/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,26 +588,28 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or


@lru_cache(maxsize=60)
def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris : set) -> TaxonomySchema:
"""
Parses a taxonomy schema file from the internet
:param schema_url: full link to the taxonomy schema
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
"""
if not is_url(schema_url): raise XbrlParseException('This function only parses remotely saved taxonomies. '
'Please use parse_taxonomy to parse local taxonomy schemas')
schema_path: str = cache.cache_file(schema_url)
return parse_taxonomy(schema_path, cache, schema_url)
return parse_taxonomy(schema_path, cache, imported_schema_uris, schema_url)


def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema:
def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : set, schema_url: str or None = None) -> TaxonomySchema:
"""
Parses a taxonomy schema file.
:param schema_path: url to the schema (on the internet)
:param cache: :class:`xbrl.cache.HttpCache` instance
:param imported_schema_uris: set of already imported schema uris
:param schema_url: if this url is set, the script will try to fetch additionally imported files such as linkbases or
imported schemas from the remote location. If this url is None, the script will try to find those resources locally.
:return: parsed :class:`xbrl.taxonomy.TaxonomySchema` object
Expand All @@ -633,18 +635,25 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
if import_uri == "":
continue

# Skip already imported URIs
if import_uri in imported_schema_uris:
continue

# sometimes the import schema location is relative. i.e schemaLocation="xbrl-linkbase-2003-12-31.xsd"
if is_url(import_uri):
# fetch the schema file from remote
taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
imported_schema_uris.add(import_uri)
elif schema_url:
# fetch the schema file from remote by reconstructing the full url
import_url = resolve_uri(schema_url, import_uri)
taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
imported_schema_uris.add(import_uri)
else:
# We have to try to fetch the linkbase locally because no full url can be constructed
import_path = resolve_uri(schema_path, import_uri)
taxonomy.imports.append(parse_taxonomy(import_path, cache))
imported_schema_uris.add(import_uri)

role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
# parse ELR's
Expand Down

0 comments on commit a38b9db

Please sign in to comment.