fixed documentation, modified cache.rst, linkbase.rst

pvmagacho-nde · Jul 9, 2022 · cf03a0b · cf03a0b
1 parent 2edd900
commit cf03a0b
Show file tree

Hide file tree

Showing 7 changed files with 125 additions and 65 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -12,7 +12,7 @@
 #
 import os
 import sys
-import sphinx_rtd_theme
+
 # sys.path.insert(0, os.path.abspath('.'))
 sys.path.insert(0, os.path.abspath('..'))
 
@@ -33,6 +33,7 @@
 extensions = [
     'sphinx_rtd_theme',
     'sphinx.ext.autodoc',
+    'sphinx_autodoc_typehints',
     'sphinx.ext.autosummary'
 ]
 

diff --git a/docs/index.rst b/docs/index.rst
@@ -28,8 +28,8 @@ Contents
    :caption: Modules
 
    modules/instance
-   modules/linkbase
    modules/taxonomy
+   modules/linkbase
    modules/cache
 
 

diff --git a/docs/modules/cache.rst b/docs/modules/cache.rst
@@ -1,9 +1,33 @@
 cache
 =====
 
-.. warning::
+This class handles a simple disk cache. It will download requested files and store them in folder specified by
+the user. If the file is requested a second time this class will serve the file directly from the file system.
+The path for caching is created out of the url of the file.
+For example, the file with the URL
+"https://www.sec.gov/Archives/edgar/data/320193/000032019318000100/aapl-20180630.xml"
+will be stored in the disk cache in
+„D:/cache/www.sec.gov/Archives/edgar/data/320193/000032019318000100/aapl-20180630.xml“
+where "D:/cache" is the caching directory specified by the user.
+
+The http cache can also delay requests. This is highly recommended if you download xbrl submissions in batch!
+This class also provides a function for that :meth:`xbrl.cache.HttpCache.cache_edgar_enclosure`.
+
+The SEC also emphasizes that you should try to keep the required server load on the EDGAR system as small as possible!
+https://www.sec.gov/privacy.htm#security
+
+
+Short note on enclosures:
+-------------------------
+The SEC provides zip folders that contain all xbrl related files for a given submission.
+These files are i.e: Instance Document, Extension Taxonomy, Linkbases.
+Due to the fact that the zip compression is very effective on xbrl submissions that naturally contain
+repeating test, it is way more efficient to download the zip folder and extract it.
+So if you want to do the SEC servers and your downloading time a favour, use this method for downloading
+the submission :).
+One way to get the zip enclosure url is through the Structured Disclosure RSS Feeds provided by the SEC:
+https://www.sec.gov/structureddata/rss-feeds-submitted-filings
 
-   !!! This documentation is currently under development !!!
 
 Parameters
 ----------

diff --git a/docs/modules/linkbase.rst b/docs/modules/linkbase.rst
@@ -1,13 +1,62 @@
 linkbase
 ========
 
-.. warning::
+Linkbases are individual XML files that bring structure to concepts
+and link them to additional information. This information can be,
+for example, user-friendly labels or references to authoritative
+literature. The linkbases are imported in the taxonomy schema.
+Linkbases can be divided into two main groups: Relation Linkbases
+and Reference Linkbases. Relation Linkbases create hierarchical
+relationships between multiple concepts. The interpretation of these
+hierarchical relationships is defined by the type of linkbase.
+Reference linkbases, on the other hand, add resources to concepts.
 
-   !!! This documentation is currently under development !!!
+**Relation Linkbases:**
 
+    **Calculation Linkbase:** The Calculation Linkbase defines simple
+    arithmetic relationships between individual concepts. If the above example
+    were a calculation linkbase, it would define the following equation:
+    us-gaap_Assets = us-gaap_AssetsCurrent + us-gaap_AssetsNonCurrent.
 
-Linkbase
---------
+    **Presentation Linkbase:** The presentation linkbase describes the order
+    in which the concepts of the taxonomy should be arranged. The above
+    example would subordinate the us-gaap_AssetsCurrent and
+    us-gaap_AssetsNonCurrent concepts to the us-gaap_Assets concept.
+
+    **Definition Linkbase:** The definition linkbase allows to create various
+    other logical connections between concepts. For example, a link
+    with the arcrole “essence-alias” can be used to emphasize that
+    two concepts cover the same or very similar subject matter.
+
+**Reference Linkbases:**
+
+    **Label Linkbase:** The Label Linkbase links concepts with one or more
+    reader-friendly labels. It is also possible to link labels in
+    different languages.
+
+    **Reference Linkbase:** The reference linkbase can be used to create
+    links between concepts and documents outside of XBRL/XML.
+    Most often, these external documents are laws or policies
+    that govern the calculation, disclosure, or presentation
+    of these concepts.
+
+
+
+
+read more at: https://manusimidt.dev/2021-07/xbrl-explained
+
+Parse functions
+---------------
+
+.. automodule:: xbrl.linkbase
+
+
+    .. automethod:: xbrl.linkbase.parse_linkbase_url
+    .. automethod:: xbrl.linkbase.parse_linkbase
+
+
+Class
+----------
 
 .. autoclass:: xbrl.linkbase::Linkbase
     :members:

diff --git a/xbrl/cache.py b/xbrl/cache.py
@@ -1,8 +1,8 @@
 """
 Downloads files and stores them locally.
 """
-import re
 import os
+import re
 import zipfile
 from pathlib import Path
 
@@ -11,26 +11,15 @@
 
 class HttpCache:
     """
-    This class handles a simple disk cache. It will download requested files and store them in folder specified by
-    the user. If the file is requested a second time this class will serve the file directly from the file system.
-    The path for caching is created out of the url of the file.
-    For example, the file with the URL
-    "https://www.sec.gov/Archives/edgar/data/320193/000032019318000100/aapl-20180630.xml"
-    will be stored in the disk cache in
-    „D:/cache/www.sec.gov/Archives/edgar/data/320193/000032019318000100/aapl-20180630.xml“
-    where "D:/cache" is the caching directory specified by the user.
-
-    The http cache can also delay requests. This is highly recommended if you download xbrl submissions in batch!
-
-    The SEC also emphasizes that you should try to keep the required server load on the EDGAR system as small as possible!
-    https://www.sec.gov/privacy.htm#security
-
+    Simple persistent HTTP cache. Requests files over http and stores them into the cache. Just returns
+    the file path if the same file is requested twice. Also automatically handles retries when request fails.
     """
 
     def __init__(self, cache_dir: str, delay: int = 500, verify_https: bool = True):
         """
         :param cache_dir: Root directory of the disk cache (all requested files will be cached in this directory)
-        :param delay: How many milliseconds should the cache wait, before requesting another file from the same server
+        :param delay: Minimum time in milliseconds between two requests
+        :param verify_https: Disable SSL certificate validation for speed up (see https://github.com/manusimidt/py-xbrl/pull/57)
         """
         # check if the cache_dir ends with a /
         if not cache_dir.endswith('/'): cache_dir += '/'
@@ -43,8 +32,14 @@ def set_headers(self, headers: dict) -> None:
         Sets the header for all following request
 
         :param headers: python dictionary with string key and value
-            i.e.: {"From": "[email protected]", "User-Agent" : "ExampleBot/1.0 (https.example.com/exampleBot)"}
-        :return:
+        Example header:
+
+        .. code-block:: json
+
+            {
+                "From": "[email protected]",
+                "User-Agent" : "ExampleBot/1.0 (https.example.com/exampleBot)"
+            }
         """
         self.headers = headers
         self.connection_manager._headers = headers
@@ -54,12 +49,11 @@ def set_connection_params(self, delay: int = 500, retries: int = 5, backoff_fact
         """
         Sets the connection params for all following request
 
-        :param delay: int specifying milliseconds to wait between each successful request
+        :param delay: Minimum time in milliseconds between two requests
         :param retries: int specifying how many times a request will be tried before assuming its failure.
         :param backoff_factor: Used to measure time to sleep between failed requests. The formula used is:
             {backoff factor} * (2 ** ({number of total retries} - 1))
         :param logs: enables or disables download logs
-        :return:
         """
         self.connection_manager._delay_ms = delay
         self.connection_manager._retries = retries
@@ -70,8 +64,7 @@ def cache_file(self, file_url: str) -> str:
         """
         Caches a file in the http cache.
 
-        :param file_url: absolute url to the file to be cached.
-            i.e: http://xbrl.fasb.org/us-gaap/2017/elts/us-gaap-2017-01-31.xsd
+        :param file_url: url (https link) to the file to be cached.
         :return: returns the absolute path to the cached file
         """
         file_path: str = self.url_to_path(file_url)
@@ -88,7 +81,8 @@ def cache_file(self, file_url: str) -> str:
 
         if not query_response.status_code == 200:
             if query_response.status_code == 404:
-                raise Exception("Could not find file on {}. Error code: {}".format(file_url, query_response.status_code))
+                raise Exception(
+                    "Could not find file on {}. Error code: {}".format(file_url, query_response.status_code))
             else:
                 raise Exception(
                     "Could not download file from {}. Error code: {}".format(file_url, query_response.status_code))
@@ -103,8 +97,7 @@ def purge_file(self, file_url: str) -> bool:
         """
         Removes a file from the cache
 
-        :param file_url: url to the file
-            i.e: https://www.sec.gov/Archives/edgar/data/320193/000032019318000100/aapl-20180630.xml
+        :param file_url: url (https link) to the file to be deleted.
         :return: true if the file was deleted, false if it could not be found
         """
         try:
@@ -115,27 +108,21 @@ def purge_file(self, file_url: str) -> bool:
 
     def url_to_path(self, url: str) -> str:
         """
-        Takes a url and converts it to the ABSOLUTE local cache path
+        Takes a url and converts it to the absolute local cache path
 
-        i.e https://xbrl.sec.gov/dei/2018/dei-2018-01-31.xsd -> /xbrl.sec.gov/dei/2018/dei-2018-01-31.xsd
-        @param url:
-        @return:
+        :param url: url of the file you want to know the cache path
+        :return: absolute local cache path
         """
         return self.cache_dir + re.sub("https?://", "", url)
 
     def cache_edgar_enclosure(self, enclosure_url: str) -> str:
         """
-        The SEC provides zip folders that contain all xbrl related files for a given submission.
-        These files are i.e: Instance Document, Extension Taxonomy, Linkbases.
-        Due to the fact that the zip compression is very effective on xbrl submissions that naturally contain
-        repeating test, it is way more efficient to download the zip folder and extract it.
-        So if you want to do the SEC servers and your downloading time a favour, use this method for downloading
-        the submission :).
-        One way to get the zip enclosure url is through the Structured Disclosure RSS Feeds provided by the SEC:
-        https://www.sec.gov/structureddata/rss-feeds-submitted-filings
+        Downloads the ZIP folder, extracts it and stores the files in the cache.
+
         :param enclosure_url: url to the zip folder.
         :return: relative path to extracted zip's content
         """
+        # todo: why is it called "cache_edgar_enclosure" you could theoretically cache any zip enclosure.
         if not enclosure_url.endswith('.zip'):
             raise Exception("This is not a valid zip folder")
         # download the zip folder and store it into the default http cache
@@ -147,6 +134,7 @@ def cache_edgar_enclosure(self, enclosure_url: str) -> str:
             zip_ref.close()
         return submission_dir_path
 
+    @DeprecationWarning
     def find_entry_file(self, dir_path: str) -> str or None:
         """
         NOTE: This function only works for enclosed SEC submissions that where already downloaded!

diff --git a/xbrl/helper/connection_manager.py b/xbrl/helper/connection_manager.py
@@ -23,7 +23,7 @@ def __init__(self, delay: int = 500, retries: int = 5, backoff_factor: float = 0
                  verify_https: bool = True):
         """
 
-        @param from_locator: Specifies sleeping time after the request is successfull.
+        @param delay: Specifies sleeping time after the request is successful.
         @param retries: How many times a request will be tried before assuming its failure.
         @param backoff_factor: Used to measure time to sleep between failed requests.
             The formula used is {backoff factor} * (2 ** ({number of total retries} - 1))

diff --git a/xbrl/linkbase.py b/xbrl/linkbase.py
@@ -1,17 +1,9 @@
-"""
-Module for parsing Linkbases
-
-There are three types of Linkbase:
-relation linkbases: calculation, definition and presentation
-label linkbase: lab
-reference linkbase: ref
-"""
 import abc
 import os
-from typing import List
 import xml.etree.ElementTree as ET
 from abc import ABC
 from enum import Enum
+from typing import List
 
 from xbrl import XbrlParseException, LinkbaseNotFoundException
 from xbrl.cache import HttpCache
@@ -388,21 +380,21 @@ def __str__(self) -> str:
 
 class Linkbase:
     """
-    Represents the complete Linkbase
+    Represents a complete Linkbase (non-generic).
     """
 
-    def __init__(self, extended_links, linkbase_type: LinkbaseType) -> None:
+    def __init__(self, extended_links: List[ExtendedLink], linkbase_type: LinkbaseType) -> None:
         """
-        @param extended_links: All standard extended links that are defined in the linkbase
-        @type extended_links: [ExtendedDefinitionLink] or [ExtendedCalculationLink] or [ExtendedPresentationLink] or [ExtendedLabelArc]
+        :param extended_links: All standard extended links that are defined in the linkbase
+        :type extended_links: [ExtendedDefinitionLink] or [ExtendedCalculationLink] or [ExtendedPresentationLink] or [ExtendedLabelArc]
+        :param linkbase_type: Type of the linkbase
         """
         self.extended_links: List[ExtendedLink] = extended_links
         self.type = linkbase_type
 
     def to_dict(self) -> dict:
         """
         Converts the Linkbase object with in a dictionary representing the Hierarchy of the locators
-        @return:
         """
         return {"standardExtendedLinkElements": [el.to_dict() for el in self.extended_links]}
 
@@ -411,14 +403,18 @@ def to_simple_dict(self) -> dict:
         Does the same as to_dict() but ignores the ArcElements.
         So it basically returns the hierarchy, without the information in which type of relationship
         parent and children are
-        @return:
         """
         return {"standardExtendedLinkElements": [el.to_simple_dict() for el in self.extended_links]}
 
 
 def parse_linkbase_url(linkbase_url: str, linkbase_type: LinkbaseType, cache: HttpCache) -> Linkbase:
     """
     Parses a linkbase given given a url
+
+    :param linkbase_url: full link to the linkbase
+    :param linkbase_type: type of the linkbase (calculation-, label-, presentation-, ...)
+    :param cache: http cache instance
+    :return: parsed Linkbase object
     """
     if not linkbase_url.startswith('http'): raise XbrlParseException(
         'This function only parses remotely saved linkbases. Please use parse_linkbase to parse local linkbases')
@@ -432,12 +428,14 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
     Parses a linkbase and returns a Linkbase object containing all
     locators, arcs and links of the linkbase in a hierarchical order (a Tree)
     A Linkbase usually does not import any additional files.
-    Thus we do not need a cache instance
+    Thus no cache instance is needed
+
     :param linkbase_path: path to the linkbase
     :param linkbase_type: Type of the linkbase
-    :param linkbase_url: if the locator of the linkbase contain relative references to concepts (i.e.: './../schema.xsd#Assets'
-    the url has to be set so that the parser can connect the locator with concept from the taxonomy
-    :return:
+    :param linkbase_url: if the locator of the linkbase contain relative references to concepts
+        (i.e.: './../schema.xsd#Assets') the url has to be set so that the parser can connect
+        the locator with concept from the taxonomy
+    :return: parsed Linkbase object
     """
     if linkbase_path.startswith('http'): raise XbrlParseException(
         'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases')