diff --git a/tests/test_local_taxonomy.py b/tests/test_local_taxonomy.py index 64d5b6e..81bd203 100644 --- a/tests/test_local_taxonomy.py +++ b/tests/test_local_taxonomy.py @@ -18,10 +18,10 @@ def test_parse_taxonomy(self): cache_dir: str = './cache/' cache: HttpCache = HttpCache(cache_dir) print(f"Saving to {cache_dir}") - + imported_schema_uris = set() extension_schema_path: str = './tests/data/example.xsd' # extension_schema_path: str = './data/example.xsd' - tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache) + tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache, imported_schema_uris = set()) print(tax) srt_tax: TaxonomySchema = tax.get_taxonomy('http://fasb.org/srt/2020-01-31') self.assertTrue(srt_tax) diff --git a/xbrl/instance.py b/xbrl/instance.py index 45ff150..75a5254 100644 --- a/xbrl/instance.py +++ b/xbrl/instance.py @@ -349,18 +349,19 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas # initalise a set that will store cached taxonomy schemas uris to avoid recursive loops - imported_schema_uris = set() + if is_url(schema_uri): # fetch the taxonomy extension schema from remote - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) elif instance_url: # fetch the taxonomy extension schema from remote by reconstructing the url schema_url = resolve_uri(instance_url, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache) else: # try to find the taxonomy extension schema file locally because no full url can be constructed schema_path = resolve_uri(instance_path, schema_uri) + imported_schema_uris = set() taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) # parse contexts and units @@ -457,21 +458,26 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None schema_uri: str = schema_ref.attrib[XLINK_NS + 'href'] # check if the schema uri is relative or absolute # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas + + # initalise a set that will store cached taxonomy schemas uris to avoid recursive loops + imported_schema_uris = set() + + if is_url(schema_uri): # fetch the taxonomy extension schema from remote - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris) elif schema_root: # take the given schema_root path as directory for searching for the taxonomy schema schema_path = str(next(Path(schema_root).glob(f'**/{schema_uri}'))) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) elif instance_url: # fetch the taxonomy extension schema from remote by reconstructing the url schema_url = resolve_uri(instance_url, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache) + taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris) else: # try to find the taxonomy extension schema file locally because no full url can be constructed schema_path = resolve_uri(instance_path, schema_uri) - taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache) + taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris) # get all contexts and units xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map) diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py index 79dc750..f9b078c 100644 --- a/xbrl/taxonomy.py +++ b/xbrl/taxonomy.py @@ -588,7 +588,7 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or @lru_cache(maxsize=60) -def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris : set) -> TaxonomySchema: +def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris: set = set()) -> TaxonomySchema: """ Parses a taxonomy schema file from the internet @@ -643,17 +643,16 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : se if is_url(import_uri): # fetch the schema file from remote taxonomy.imports.append(parse_taxonomy_url(import_uri, cache)) - imported_schema_uris.add(import_uri) elif schema_url: # fetch the schema file from remote by reconstructing the full url import_url = resolve_uri(schema_url, import_uri) - taxonomy.imports.append(parse_taxonomy_url(import_url, cache)) imported_schema_uris.add(import_uri) + taxonomy.imports.append(parse_taxonomy_url(import_url, cache)) else: # We have to try to fetch the linkbase locally because no full url can be constructed import_path = resolve_uri(schema_path, import_uri) - taxonomy.imports.append(parse_taxonomy(import_path, cache)) - imported_schema_uris.add(import_uri) + taxonomy.imports.append(parse_taxonomy(import_path, cache, imported_schema_uris)) + role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES) # parse ELR's