Skip to content

Commit

Permalink
Simplifed with default value for parse via parse_taxonomy_url & fixed…
Browse files Browse the repository at this point in the history
… the set hashing bug.
  • Loading branch information
Sam-el0 committed May 12, 2024
1 parent a38b9db commit 658e730
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 14 deletions.
4 changes: 2 additions & 2 deletions tests/test_local_taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,10 @@ def test_parse_taxonomy(self):
cache_dir: str = './cache/'
cache: HttpCache = HttpCache(cache_dir)
print(f"Saving to {cache_dir}")

imported_schema_uris = set()
extension_schema_path: str = './tests/data/example.xsd'
# extension_schema_path: str = './data/example.xsd'
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache)
tax: TaxonomySchema = parse_taxonomy(extension_schema_path, cache, imported_schema_uris = set())
print(tax)
srt_tax: TaxonomySchema = tax.get_taxonomy('http://fasb.org/srt/2020-01-31')
self.assertTrue(srt_tax)
Expand Down
20 changes: 13 additions & 7 deletions xbrl/instance.py
Original file line number Diff line number Diff line change
Expand Up @@ -349,18 +349,19 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()


if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
elif instance_url:
# fetch the taxonomy extension schema from remote by reconstructing the url
schema_url = resolve_uri(instance_url, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
imported_schema_uris = set()
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# parse contexts and units
Expand Down Expand Up @@ -457,21 +458,26 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
# check if the schema uri is relative or absolute
# submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas

# initalise a set that will store cached taxonomy schemas uris to avoid recursive loops
imported_schema_uris = set()


if is_url(schema_uri):
# fetch the taxonomy extension schema from remote
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_uri, cache, imported_schema_uris)
elif schema_root:
# take the given schema_root path as directory for searching for the taxonomy schema
schema_path = str(next(Path(schema_root).glob(f'**/{schema_uri}')))
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)
elif instance_url:
# fetch the taxonomy extension schema from remote by reconstructing the url
schema_url = resolve_uri(instance_url, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache, imported_schema_uris)
else:
# try to find the taxonomy extension schema file locally because no full url can be constructed
schema_path = resolve_uri(instance_path, schema_uri)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache, imported_schema_uris)

# get all contexts and units
xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map)
Expand Down
9 changes: 4 additions & 5 deletions xbrl/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,7 @@ def parse_common_taxonomy(cache: HttpCache, namespace: str) -> TaxonomySchema or


@lru_cache(maxsize=60)
def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris : set) -> TaxonomySchema:
def parse_taxonomy_url(schema_url: str, cache: HttpCache, imported_schema_uris: set = set()) -> TaxonomySchema:
"""
Parses a taxonomy schema file from the internet
Expand Down Expand Up @@ -643,17 +643,16 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, imported_schema_uris : se
if is_url(import_uri):
# fetch the schema file from remote
taxonomy.imports.append(parse_taxonomy_url(import_uri, cache))
imported_schema_uris.add(import_uri)
elif schema_url:
# fetch the schema file from remote by reconstructing the full url
import_url = resolve_uri(schema_url, import_uri)
taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
imported_schema_uris.add(import_uri)
taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
else:
# We have to try to fetch the linkbase locally because no full url can be constructed
import_path = resolve_uri(schema_path, import_uri)
taxonomy.imports.append(parse_taxonomy(import_path, cache))
imported_schema_uris.add(import_uri)
taxonomy.imports.append(parse_taxonomy(import_path, cache, imported_schema_uris))


role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
# parse ELR's
Expand Down

0 comments on commit 658e730

Please sign in to comment.