diff --git a/.buildinfo b/.buildinfo new file mode 100644 index 00000000..908d60b2 --- /dev/null +++ b/.buildinfo @@ -0,0 +1,4 @@ +# Sphinx build info version 1 +# This file hashes the configuration used when building these files. When it is not found, a full rebuild will be done. +config: 626844e0d1e6e83ff7d8a178a91d759a +tags: 645f666f9bcd5a90fca523b33c5a78b7 diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/_autosummary/elm.base.ApiBase.html b/_autosummary/elm.base.ApiBase.html new file mode 100644 index 00000000..82570432 --- /dev/null +++ b/_autosummary/elm.base.ApiBase.html @@ -0,0 +1,647 @@ + + + + + + + elm.base.ApiBase — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.base.ApiBase

+
+
+class ApiBase(model=None)[source]
+

Bases: ABC

+

Class to parse text from a PDF document.

+
+
Parameters:
+

model (None | str) – Optional specification of OpenAI model to use. Default is +cls.DEFAULT_MODEL

+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, temperature])

Have a continuous chat with the LLM including context from previous chat() calls stored as attributes in this class.

clear()

Clear chat history and reduce messages to just the initial model role message.

count_tokens(text, model)

Return the number of tokens in a string.

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do pdf text cleaning.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_ROLE

High level model role

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

+
+
+DEFAULT_MODEL = 'gpt-3.5-turbo'
+

Default model to do pdf text cleaning.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+MODEL_ROLE = 'You are a research assistant that answers questions.'
+

High level model role

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+clear()[source]
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+async static call_api(url, headers, request_json)[source]
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)[source]
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+chat(query, temperature=0)[source]
+

Have a continuous chat with the LLM including context from previous +chat() calls stored as attributes in this class.

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)[source]
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)[source]
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+classmethod get_embedding(text)[source]
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+
+static count_tokens(text, model)[source]
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.base.ApiQueue.html b/_autosummary/elm.base.ApiQueue.html new file mode 100644 index 00000000..48d08153 --- /dev/null +++ b/_autosummary/elm.base.ApiQueue.html @@ -0,0 +1,426 @@ + + + + + + + elm.base.ApiQueue — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.base.ApiQueue

+
+
+class ApiQueue(url, headers, request_jsons, ignore_error=None, rate_limit=40000.0, max_retries=10)[source]
+

Bases: object

+

Class to manage the parallel API queue and submission

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
  • max_retries (int) – Number of times to retry an API call with an error response before +raising an error.

  • +
+
+
+

Methods

+ + + + + + + + + + + + +

collect_jobs()

Collect asyncronous API calls and API outputs.

run()

Run all asyncronous API calls.

submit_jobs()

Submit a subset jobs asynchronously and hold jobs in the api_jobs attribute.

+

Attributes

+ + + + + + +

waiting_on

Get a list of async jobs that are being waited on.

+
+
+property waiting_on
+

Get a list of async jobs that are being waited on.

+
+ +
+
+submit_jobs()[source]
+

Submit a subset jobs asynchronously and hold jobs in the api_jobs +attribute. Break when the rate_limit is exceeded.

+
+ +
+
+async collect_jobs()[source]
+

Collect asyncronous API calls and API outputs. Store outputs in the +out attribute.

+
+ +
+
+async run()[source]
+

Run all asyncronous API calls.

+
+
Returns:
+

out (list) – List of API call outputs with same ordering as request_jsons +input.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.base.html b/_autosummary/elm.base.html new file mode 100644 index 00000000..4abee525 --- /dev/null +++ b/_autosummary/elm.base.html @@ -0,0 +1,332 @@ + + + + + + + elm.base — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.base

+

ELM abstract class for API calls

+

Classes

+ + + + + + + + + +

ApiBase([model])

Class to parse text from a PDF document.

ApiQueue(url, headers, request_jsons[, ...])

Class to manage the parallel API queue and submission

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.chunk.Chunker.html b/_autosummary/elm.chunk.Chunker.html new file mode 100644 index 00000000..eee200d8 --- /dev/null +++ b/_autosummary/elm.chunk.Chunker.html @@ -0,0 +1,785 @@ + + + + + + + elm.chunk.Chunker — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.chunk.Chunker

+
+
+class Chunker(text, tag=None, tokens_per_chunk=500, overlap=1, split_on='\n\n')[source]
+

Bases: ApiBase

+

Class to break text up into overlapping chunks

+

NOTE: very large paragraphs that exceed the tokens per chunk will not be +split up and will still be padded with overlap.

+
+
Parameters:
+
    +
  • text (str) – Single body of text to break up. Works well if this is a single +document with empty lines between paragraphs.

  • +
  • tag (None | str) – Optional reference tag to include at the beginning of each text +chunk

  • +
  • tokens_per_chunk (float) – Nominal token count per text chunk. Overlap paragraphs will exceed +this.

  • +
  • overlap (int) – Number of paragraphs to overlap between chunks

  • +
  • split_on (str) – Sub string to split text into paragraphs.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

add_overlap(chunks_input)

Add overlap on either side of a text chunk.

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, temperature])

Have a continuous chat with the LLM including context from previous chat() calls stored as attributes in this class.

chunk_text()

Perform the text chunking operation

clean_paragraphs(text)

Clean up double line breaks to make sure paragraphs can be detected in the text.

clear()

Clear chat history and reduce messages to just the initial model role message.

count_tokens(text, model)

Return the number of tokens in a string.

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

is_good_paragraph(paragraph)

Basic tests to make sure the paragraph is useful text.

merge_chunks(chunks_input)

Merge chunks until they reach the token limit per chunk.

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do pdf text cleaning.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_ROLE

High level model role

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

chunk_tokens

Number of tokens per chunk.

chunks

List of overlapping text chunks (strings).

paragraph_tokens

Number of tokens per paragraph.

paragraphs

Get a list of paragraphs in the text demarkated by an empty line.

+
+
+property chunks
+

List of overlapping text chunks (strings).

+
+
Returns:
+

list

+
+
+
+ +
+
+property paragraphs
+

Get a list of paragraphs in the text demarkated by an empty line.

+
+
Returns:
+

list

+
+
+
+ +
+
+static clean_paragraphs(text)[source]
+

Clean up double line breaks to make sure paragraphs can be detected +in the text.

+
+ +
+
+static is_good_paragraph(paragraph)[source]
+

Basic tests to make sure the paragraph is useful text.

+
+ +
+
+property paragraph_tokens
+

Number of tokens per paragraph.

+
+
Returns:
+

list

+
+
+
+ +
+
+property chunk_tokens
+

Number of tokens per chunk.

+
+
Returns:
+

list

+
+
+
+ +
+
+merge_chunks(chunks_input)[source]
+

Merge chunks until they reach the token limit per chunk.

+
+
Parameters:
+

chunks_input (list) – List of list of integers: [[0, 1], [2], [3, 4]] where nested lists +are chunks and the integers are paragraph indices

+
+
Returns:
+

chunks (list) – List of list of integers: [[0, 1], [2], [3, 4]] where nested lists +are chunks and the integers are paragraph indices

+
+
+
+ +
+
+add_overlap(chunks_input)[source]
+

Add overlap on either side of a text chunk. This ignores token +limit.

+
+
Parameters:
+

chunks_input (list) – List of list of integers: [[0, 1], [2], [3, 4]] where nested lists +are chunks and the integers are paragraph indices

+
+
Returns:
+

chunks (list) – List of list of integers: [[0, 1], [2], [3, 4]] where nested lists +are chunks and the integers are paragraph indices

+
+
+
+ +
+
+chunk_text()[source]
+

Perform the text chunking operation

+
+
Returns:
+

chunks (list) – List of strings where each string is an overlapping chunk of text

+
+
+
+ +
+
+DEFAULT_MODEL = 'gpt-3.5-turbo'
+

Default model to do pdf text cleaning.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+MODEL_ROLE = 'You are a research assistant that answers questions.'
+

High level model role

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async static call_api(url, headers, request_json)
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+chat(query, temperature=0)
+

Have a continuous chat with the LLM including context from previous +chat() calls stored as attributes in this class.

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+clear()
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+static count_tokens(text, model)
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+classmethod get_embedding(text)
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.chunk.html b/_autosummary/elm.chunk.html new file mode 100644 index 00000000..4eacfcdf --- /dev/null +++ b/_autosummary/elm.chunk.html @@ -0,0 +1,329 @@ + + + + + + + elm.chunk — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.chunk

+

Utility to break text up into overlapping chunks.

+

Classes

+ + + + + + +

Chunker(text[, tag, tokens_per_chunk, ...])

Class to break text up into overlapping chunks

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.cli.html b/_autosummary/elm.cli.html new file mode 100644 index 00000000..44ab12fc --- /dev/null +++ b/_autosummary/elm.cli.html @@ -0,0 +1,321 @@ + + + + + + + elm.cli — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.cli

+

ELM Ordinances CLI.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.embed.ChunkAndEmbed.html b/_autosummary/elm.embed.ChunkAndEmbed.html new file mode 100644 index 00000000..6156c327 --- /dev/null +++ b/_autosummary/elm.embed.ChunkAndEmbed.html @@ -0,0 +1,705 @@ + + + + + + + elm.embed.ChunkAndEmbed — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.embed.ChunkAndEmbed

+
+
+class ChunkAndEmbed(text, model=None, **chunk_kwargs)[source]
+

Bases: ApiBase

+

Class to chunk text data and create embeddings

+
+
Parameters:
+
    +
  • text (str) – Single continuous piece of text to chunk up by paragraph and embed +or filepath to .txt file containing one piece of text.

  • +
  • model (None | str) – Optional specification of OpenAI model to use. Default is +cls.DEFAULT_MODEL

  • +
  • chunk_kwargs (dict | None) – kwargs for initialization of elm.chunk.Chunker

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, temperature])

Have a continuous chat with the LLM including context from previous chat() calls stored as attributes in this class.

clean_tables(text)

Make sure that table headers are in the same paragraph as the table itself.

clear()

Clear chat history and reduce messages to just the initial model role message.

count_tokens(text, model)

Return the number of tokens in a string.

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

run([rate_limit])

Run text embedding in serial

run_async([rate_limit])

Run text embedding on chunks asynchronously

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do embeddings.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_ROLE

High level model role

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

+
+
+DEFAULT_MODEL = 'text-embedding-ada-002'
+

Default model to do embeddings.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+MODEL_ROLE = 'You are a research assistant that answers questions.'
+

High level model role

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async static call_api(url, headers, request_json)
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+chat(query, temperature=0)
+

Have a continuous chat with the LLM including context from previous +chat() calls stored as attributes in this class.

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+static clean_tables(text)[source]
+

Make sure that table headers are in the same paragraph as the table +itself. Typically, tables are looked for with pipes and hyphens, which +is how GPT cleans tables in text.

+
+ +
+
+clear()
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+static count_tokens(text, model)
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+classmethod get_embedding(text)
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+
+run(rate_limit=175000.0)[source]
+

Run text embedding in serial

+
+
Parameters:
+

rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +embedding limit is 350k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

+
+
Returns:
+

embedding (list) – List of 1D arrays representing the embeddings for all text chunks

+
+
+
+ +
+
+async run_async(rate_limit=175000.0)[source]
+

Run text embedding on chunks asynchronously

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await ChunkAndEmbed.run_async()

+
+
Parameters:
+

rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +embedding limit is 350k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

+
+
Returns:
+

embedding (list) – List of 1D arrays representing the embeddings for all text chunks

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.embed.html b/_autosummary/elm.embed.html new file mode 100644 index 00000000..bf61d576 --- /dev/null +++ b/_autosummary/elm.embed.html @@ -0,0 +1,329 @@ + + + + + + + elm.embed — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.embed

+

ELM text embedding

+

Classes

+ + + + + + +

ChunkAndEmbed(text[, model])

Class to chunk text data and create embeddings

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.exceptions.ELMError.html b/_autosummary/elm.exceptions.ELMError.html new file mode 100644 index 00000000..a2538c50 --- /dev/null +++ b/_autosummary/elm.exceptions.ELMError.html @@ -0,0 +1,327 @@ + + + + + + + elm.exceptions.ELMError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.exceptions.ELMError

+
+
+exception ELMError[source]
+

Generic ELM Error.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.exceptions.ELMRuntimeError.html b/_autosummary/elm.exceptions.ELMRuntimeError.html new file mode 100644 index 00000000..c8473bbd --- /dev/null +++ b/_autosummary/elm.exceptions.ELMRuntimeError.html @@ -0,0 +1,327 @@ + + + + + + + elm.exceptions.ELMRuntimeError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.exceptions.ELMRuntimeError

+
+
+exception ELMRuntimeError[source]
+

ELM RuntimeError.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.exceptions.html b/_autosummary/elm.exceptions.html new file mode 100644 index 00000000..5634cf08 --- /dev/null +++ b/_autosummary/elm.exceptions.html @@ -0,0 +1,332 @@ + + + + + + + elm.exceptions — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.exceptions

+

Custom Exceptions and Errors for ELM.

+

Exceptions

+ + + + + + + + + +

ELMError

Generic ELM Error.

ELMRuntimeError

ELM RuntimeError.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.html b/_autosummary/elm.html new file mode 100644 index 00000000..64f307ee --- /dev/null +++ b/_autosummary/elm.html @@ -0,0 +1,366 @@ + + + + + + + elm — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm

+

Energy Language Model

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

elm.base

ELM abstract class for API calls

elm.chunk

Utility to break text up into overlapping chunks.

elm.cli

ELM Ordinances CLI.

elm.embed

ELM text embedding

elm.exceptions

Custom Exceptions and Errors for ELM.

elm.ords

ELM ordinance document download and structured data extraction.

elm.osti

Utilities for retrieving data from OSTI.

elm.pdf

ELM PDF to text parser

elm.summary

Research Summarization and Distillation with LLMs

elm.tree

ELM decision trees.

elm.utilities

ELM utility classes and functions.

elm.version

ELM version number

elm.web

ELM Web scraping.

elm.wizard

ELM energy wizard

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.download.download_county_ordinance.html b/_autosummary/elm.ords.download.download_county_ordinance.html new file mode 100644 index 00000000..c6f695c2 --- /dev/null +++ b/_autosummary/elm.ords.download.download_county_ordinance.html @@ -0,0 +1,355 @@ + + + + + + + elm.ords.download.download_county_ordinance — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.download.download_county_ordinance

+
+
+async download_county_ordinance(location, text_splitter, num_urls=5, file_loader_kwargs=None, browser_semaphore=None, **kwargs)[source]
+

Download the ordinance document for a single county.

+
+
Parameters:
+
    +
  • location (elm.ords.utilities.location.Location) – Location objects representing the county.

  • +
  • text_splitter (obj, optional) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • num_urls (int, optional) – Number of unique Google search result URL’s to check for +ordinance document. By default, 5.

  • +
  • file_loader_kwargs (dict, optional) – Dictionary of keyword-argument pairs to initialize +elm.web.file_loader.AsyncFileLoader with. The +“pw_launch_kwargs” key in these will also be used to initialize +the elm.web.google_search.PlaywrightGoogleLinkSearch +used for the google URL search. By default, None.

  • +
  • browser_semaphore (asyncio.Semaphore, optional) – Semaphore instance that can be used to limit the number of +playwright browsers open concurrently. If None, no limits +are applied. By default, None.

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument | None – Document instance for the downloaded document, or None if no +document was found.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.download.html b/_autosummary/elm.ords.download.html new file mode 100644 index 00000000..7d2a04ff --- /dev/null +++ b/_autosummary/elm.ords.download.html @@ -0,0 +1,330 @@ + + + + + + + elm.ords.download — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.download

+

ELM Ordinance county file downloading logic

+

Functions

+ + + + + + +

download_county_ordinance(location, ...[, ...])

Download the ordinance document for a single county.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.html b/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.html new file mode 100644 index 00000000..da1ebf5b --- /dev/null +++ b/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.html @@ -0,0 +1,358 @@ + + + + + + + elm.ords.extraction.apply.check_for_ordinance_info — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.apply.check_for_ordinance_info

+
+
+async check_for_ordinance_info(doc, text_splitter, **kwargs)[source]
+

Parse a single document for ordinance information.

+
+
Parameters:
+
    +
  • doc (elm.web.document.BaseDocument) – A document potentially containing ordinance information. Note +that if the document’s metadata contains the +"contains_ord_info" key, it will not be processed. To force +a document to be processed by this function, remove that key +from the documents metadata.

  • +
  • text_splitter (obj) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument – Document that has been parsed for ordinance text. The results of +the parsing are stored in the documents metadata. In particular, +the metadata will contain a "contains_ord_info" key that +will be set to True if ordinance info was found in the text, +and False otherwise. If True, the metadata will also +contain a "date" key containing the most recent date that +the ordinance was enacted (or a tuple of None if not found), +and an "ordinance_text" key containing the ordinance text +snippet. Note that the snippet may contain other info as well, +but should encapsulate all of the ordinance text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.html b/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.html new file mode 100644 index 00000000..20ab4b1a --- /dev/null +++ b/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.html @@ -0,0 +1,356 @@ + + + + + + + elm.ords.extraction.apply.extract_ordinance_text_with_llm — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.apply.extract_ordinance_text_with_llm

+
+
+async extract_ordinance_text_with_llm(doc, text_splitter, extractor)[source]
+

Extract ordinance text from document using LLM.

+
+
Parameters:
+
    +
  • doc (elm.web.document.BaseDocument) – A document known to contain ordinance information. This means it +must contain an "ordinance_text" key in the metadata. You +can run +check_for_ordinance_info() +to have this attribute populated automatically for documents +that are found to contain ordinance data. Note that if the +document’s metadata does not contain the "ordinance_text" +key, you will get an error.

  • +
  • text_splitter (obj) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • extractor (elm.ords.extraction.ordinance.OrdinanceExtractor) – Instance of ~elm.ords.extraction.ordinance.OrdinanceExtractor +used for ordinance text extraction.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument – Document that has been parsed for ordinance text. The results of +the extraction are stored in the document’s metadata. In +particular, the metadata will contain a +"cleaned_ordinance_text" key that will contain the cleaned +ordinance text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.html b/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.html new file mode 100644 index 00000000..35fb2087 --- /dev/null +++ b/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.html @@ -0,0 +1,374 @@ + + + + + + + elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation

+
+
+async extract_ordinance_text_with_ngram_validation(doc, text_splitter, n=4, num_extraction_attempts=3, ngram_fraction_threshold=0.95, **kwargs)[source]
+

Extract ordinance text for a single document with known ord info.

+

This extraction includes an “ngram” check, which attempts to detect +wether or not the cleaned text was extracted from the original +ordinance text. The processing will attempt to re-extract the text +if the validation does not pass a certain threshold until the +maximum number of attempts is reached. If the text still does not +pass validation at this point, there is a good chance that the LLM +hallucinated parts of the output text, so caution should be taken.

+
+
Parameters:
+
    +
  • doc (elm.web.document.BaseDocument) – A document known to contain ordinance information. This means it +must contain an "ordinance_text" key in the metadata. You +can run +check_for_ordinance_info() +to have this attribute populated automatically for documents +that are found to contain ordinance data. Note that if the +document’s metadata does not contain the "ordinance_text" +key, it will not be processed.

  • +
  • text_splitter (obj) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • n (int, optional) – Number of words to include per ngram for the ngram validation, +which helps ensure that the LLM did not hallucinate. +By default, 4.

  • +
  • num_extraction_attempts (int, optional) – Number of extraction attempts before returning text that did not +pass the ngram check. If the processing exceeds this value, +there is a good chance that the LLM hallucinated parts of the +output text. Cannot be negative or 0. By default, 3.

  • +
  • ngram_fraction_threshold (float, optional) – Fraction of ngrams in the cleaned text that are also found in +the original ordinance text for the extraction to be considered +successful. Should be a value between 0 and 1 (inclusive). +By default, 0.95.

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument – Document that has been parsed for ordinance text. The results of +the extraction are stored in the document’s metadata. In +particular, the metadata will contain a +"cleaned_ordinance_text" key that will contain the cleaned +ordinance text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.html b/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.html new file mode 100644 index 00000000..719b01f6 --- /dev/null +++ b/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.html @@ -0,0 +1,351 @@ + + + + + + + elm.ords.extraction.apply.extract_ordinance_values — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.apply.extract_ordinance_values

+
+
+async extract_ordinance_values(doc, **kwargs)[source]
+

Extract ordinance values for a single document with known ord text.

+
+
Parameters:
+
    +
  • doc (elm.web.document.BaseDocument) – A document known to contain ordinance text. This means it must +contain an "cleaned_ordinance_text" key in the metadata. You +can run +extract_ordinance_text() +to have this attribute populated automatically for documents +that are found to contain ordinance data. Note that if the +document’s metadata does not contain the +"cleaned_ordinance_text" key, it will not be processed.

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument – Document that has been parsed for ordinance values. The results +of the extraction are stored in the document’s metadata. In +particular, the metadata will contain an "ordinance_values" +key that will contain the DataFame with ordinance values.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.apply.html b/_autosummary/elm.ords.extraction.apply.html new file mode 100644 index 00000000..5e4e90f6 --- /dev/null +++ b/_autosummary/elm.ords.extraction.apply.html @@ -0,0 +1,340 @@ + + + + + + + elm.ords.extraction.apply — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.apply

+

ELM Ordinance function to apply ordinance extraction on a document

+

Functions

+ + + + + + + + + + + + + + + +

check_for_ordinance_info(doc, text_splitter, ...)

Parse a single document for ordinance information.

extract_ordinance_text_with_llm(doc, ...)

Extract ordinance text from document using LLM.

extract_ordinance_text_with_ngram_validation(...)

Extract ordinance text for a single document with known ord info.

extract_ordinance_values(doc, **kwargs)

Extract ordinance values for a single document with known ord text.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.date.DateExtractor.html b/_autosummary/elm.ords.extraction.date.DateExtractor.html new file mode 100644 index 00000000..2972b629 --- /dev/null +++ b/_autosummary/elm.ords.extraction.date.DateExtractor.html @@ -0,0 +1,367 @@ + + + + + + + elm.ords.extraction.date.DateExtractor — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.date.DateExtractor

+
+
+class DateExtractor(structured_llm_caller)[source]
+

Bases: object

+

Helper class to extract date info from document.

+
+
Parameters:
+

structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

+
+
+

Methods

+ + + + + + +

parse(doc)

Extract date (year, month, day) from doc.

+

Attributes

+ + + + + + +

SYSTEM_MESSAGE

+
+
+async parse(doc)[source]
+

Extract date (year, month, day) from doc.

+
+
Parameters:
+

doc (elm.web.document.BaseDocument) – Document with a raw_pages attribute.

+
+
Returns:
+

tuple – 3-tuple containing year, month, day, or None if any of +those are not found.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.date.html b/_autosummary/elm.ords.extraction.date.html new file mode 100644 index 00000000..64948fad --- /dev/null +++ b/_autosummary/elm.ords.extraction.date.html @@ -0,0 +1,331 @@ + + + + + + + elm.ords.extraction.date — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.date

+

ELM Ordinance date extraction logic.

+

Classes

+ + + + + + +

DateExtractor(structured_llm_caller)

Helper class to extract date info from document.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.features.SetbackFeatures.html b/_autosummary/elm.ords.extraction.features.SetbackFeatures.html new file mode 100644 index 00000000..6800b851 --- /dev/null +++ b/_autosummary/elm.ords.extraction.features.SetbackFeatures.html @@ -0,0 +1,349 @@ + + + + + + + elm.ords.extraction.features.SetbackFeatures — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.features.SetbackFeatures

+
+
+class SetbackFeatures[source]
+

Bases: object

+

Utility class to get mutually-exclusive feature descriptions.

+

Methods

+ + + +
+

Attributes

+ + + + + + + + + + + + +

DEFAULT_FEATURE_DESCRIPTIONS

FEATURES_AS_IGNORE

FEATURE_CLARIFICATIONS

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.features.html b/_autosummary/elm.ords.extraction.features.html new file mode 100644 index 00000000..33d6ab8b --- /dev/null +++ b/_autosummary/elm.ords.extraction.features.html @@ -0,0 +1,331 @@ + + + + + + + elm.ords.extraction.features — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.features

+

ELM Ordinance mutually-exclusive features class.

+

Classes

+ + + + + + +

SetbackFeatures()

Utility class to get mutually-exclusive feature descriptions.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.html b/_autosummary/elm.ords.extraction.graphs.html new file mode 100644 index 00000000..5a6a1d92 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.html @@ -0,0 +1,355 @@ + + + + + + + elm.ords.extraction.graphs — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs

+

ELM Ordinance Decision Tree Graph setup functions.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

llm_response_does_not_start_with_no(response)

Check if LLM response does not start with "no" (case-insensitive)

llm_response_starts_with_no(response)

Check if LLM response begins with "no" (case-insensitive)

llm_response_starts_with_yes(response)

Check if LLM response begins with "yes" (case-insensitive)

setup_base_graph(**kwargs)

Setup Graph to get setback ordinance text for a particular feature.

setup_conditional(**kwargs)

Setup Graph to extract min/max setback values (after mult) for a feature.

setup_graph_extra_restriction(**kwargs)

Setup Graph to extract non-setback ordinance values from text.

setup_graph_wes_types(**kwargs)

Setup Graph to get the largest turbine size in the ordinance text.

setup_multiplier(**kwargs)

Setup Graph to extract a setbacks multiplier values for a feature.

setup_participating_owner(**kwargs)

Setup Graph to check for participating vs non-participating owner setbacks for a feature.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.html b/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.html new file mode 100644 index 00000000..552d4505 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.extraction.graphs.llm_response_does_not_start_with_no — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.llm_response_does_not_start_with_no

+
+
+llm_response_does_not_start_with_no(response)[source]
+

Check if LLM response does not start with “no” (case-insensitive)

+
+
Parameters:
+

response (str) – LLM response string.

+
+
Returns:
+

boolTrue if LLM response does not begin with “No”.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.html b/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.html new file mode 100644 index 00000000..e5e0f5bb --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.extraction.graphs.llm_response_starts_with_no — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.llm_response_starts_with_no

+
+
+llm_response_starts_with_no(response)[source]
+

Check if LLM response begins with “no” (case-insensitive)

+
+
Parameters:
+

response (str) – LLM response string.

+
+
Returns:
+

boolTrue if LLM response begins with “No”.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.html b/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.html new file mode 100644 index 00000000..cc65b128 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.extraction.graphs.llm_response_starts_with_yes — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.llm_response_starts_with_yes

+
+
+llm_response_starts_with_yes(response)[source]
+

Check if LLM response begins with “yes” (case-insensitive)

+
+
Parameters:
+

response (str) – LLM response string.

+
+
Returns:
+

boolTrue if LLM response begins with “Yes”.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_base_graph.html b/_autosummary/elm.ords.extraction.graphs.setup_base_graph.html new file mode 100644 index 00000000..2c5422d2 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_base_graph.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.extraction.graphs.setup_base_graph — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_base_graph

+
+
+setup_base_graph(**kwargs)[source]
+

Setup Graph to get setback ordinance text for a particular feature.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_conditional.html b/_autosummary/elm.ords.extraction.graphs.setup_conditional.html new file mode 100644 index 00000000..5db9bd43 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_conditional.html @@ -0,0 +1,340 @@ + + + + + + + elm.ords.extraction.graphs.setup_conditional — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_conditional

+
+
+setup_conditional(**kwargs)[source]
+

Setup Graph to extract min/max setback values (after mult) for a +feature. These are typically given within the context of +‘the greater of’ or ‘the lesser of’ clauses.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.html b/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.html new file mode 100644 index 00000000..7aada3b4 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.extraction.graphs.setup_graph_extra_restriction — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_graph_extra_restriction

+
+
+setup_graph_extra_restriction(**kwargs)[source]
+

Setup Graph to extract non-setback ordinance values from text.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.html b/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.html new file mode 100644 index 00000000..ed6198bc --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.extraction.graphs.setup_graph_wes_types — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_graph_wes_types

+
+
+setup_graph_wes_types(**kwargs)[source]
+

Setup Graph to get the largest turbine size in the ordinance text.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_multiplier.html b/_autosummary/elm.ords.extraction.graphs.setup_multiplier.html new file mode 100644 index 00000000..1eeb0efb --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_multiplier.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.extraction.graphs.setup_multiplier — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_multiplier

+
+
+setup_multiplier(**kwargs)[source]
+

Setup Graph to extract a setbacks multiplier values for a feature.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.html b/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.html new file mode 100644 index 00000000..27ccae23 --- /dev/null +++ b/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.html @@ -0,0 +1,339 @@ + + + + + + + elm.ords.extraction.graphs.setup_participating_owner — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.graphs.setup_participating_owner

+
+
+setup_participating_owner(**kwargs)[source]
+

Setup Graph to check for participating vs non-participating owner +setbacks for a feature.

+
+
Parameters:
+

**kwargs – Keyword-value pairs to add to graph.

+
+
Returns:
+

nx.DiGraph – Graph instance that can be used to initialize an +elm.tree.DecisionTree.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.html b/_autosummary/elm.ords.extraction.html new file mode 100644 index 00000000..cc5fbe2a --- /dev/null +++ b/_autosummary/elm.ords.extraction.html @@ -0,0 +1,350 @@ + + + + + + + elm.ords.extraction — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction

+

ELM Ordinance text extraction tooling.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

elm.ords.extraction.apply

ELM Ordinance function to apply ordinance extraction on a document

elm.ords.extraction.date

ELM Ordinance date extraction logic.

elm.ords.extraction.features

ELM Ordinance mutually-exclusive features class.

elm.ords.extraction.graphs

ELM Ordinance Decision Tree Graph setup functions.

elm.ords.extraction.ngrams

ELM Ordinance ngram text validation

elm.ords.extraction.ordinance

ELM Ordinance document content Validation logic

elm.ords.extraction.parse

ELM Ordinance structured parsing class.

elm.ords.extraction.tree

ELM Ordinance async decision tree.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.html b/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.html new file mode 100644 index 00000000..01515952 --- /dev/null +++ b/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.html @@ -0,0 +1,344 @@ + + + + + + + elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams

+
+
+convert_text_to_sentence_ngrams(text, n)[source]
+

Convert input text to a list of ngrams.

+

The text is first split byu sentence, after which each sentence is +converted into ngrams. The ngrams for all sentences are combined and +returned.

+
+
Parameters:
+
    +
  • text (str) – Input text containing one or more sentences.

  • +
  • n (int) – Number of words to include per ngram.

  • +
+
+
Returns:
+

list – List of tuples, where each tuple is an ngram from the original +text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ngrams.html b/_autosummary/elm.ords.extraction.ngrams.html new file mode 100644 index 00000000..df057b98 --- /dev/null +++ b/_autosummary/elm.ords.extraction.ngrams.html @@ -0,0 +1,336 @@ + + + + + + + elm.ords.extraction.ngrams — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ngrams

+

ELM Ordinance ngram text validation

+

This check helps validate that the LLM extracted text from the original +document and did not make it up itself.

+

Functions

+ + + + + + + + + +

convert_text_to_sentence_ngrams(text, n)

Convert input text to a list of ngrams.

sentence_ngram_containment(original, test, n)

Fraction of sentence ngrams from the test text found in the original.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.html b/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.html new file mode 100644 index 00000000..ef7bee83 --- /dev/null +++ b/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.html @@ -0,0 +1,345 @@ + + + + + + + elm.ords.extraction.ngrams.sentence_ngram_containment — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ngrams.sentence_ngram_containment

+
+
+sentence_ngram_containment(original, test, n)[source]
+

Fraction of sentence ngrams from the test text found in the original.

+
+
Parameters:
+
    +
  • original (str) – Original (superset) text. Ngrams from the test text will be +checked against this text.

  • +
  • test (str) – Test (sub) text. Ngrams from this text will be searched for in +the original text, and the fraction of these ngrams that are +found in the original text will be returned.

  • +
  • n (int) – Number of words to include per ngram.

  • +
+
+
Returns:
+

float – Fraction of ngrams from the test input that were found in the +original text. Always returns True if test has no ngrams.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.html b/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.html new file mode 100644 index 00000000..80e0e034 --- /dev/null +++ b/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.html @@ -0,0 +1,392 @@ + + + + + + + elm.ords.extraction.ordinance.OrdinanceExtractor — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ordinance.OrdinanceExtractor

+
+
+class OrdinanceExtractor(llm_caller)[source]
+

Bases: object

+

Extract succinct ordinance text from input

+
+
Parameters:
+

llm_caller (elm.ords.llm.LLMCaller) – LLM Caller instance used to extract ordinance info with.

+
+
+

Methods

+ + + + + + + + + +

check_for_correct_size(text_chunks)

Extract ordinance text from input text chunks for large WES.

check_for_restrictions(text_chunks)

Extract restriction ordinance text from input text chunks.

+

Attributes

+ + + + + + + + + + + + +

MODEL_INSTRUCTIONS_RESTRICTIONS

MODEL_INSTRUCTIONS_SIZE

SYSTEM_MESSAGE

+
+
+async check_for_restrictions(text_chunks)[source]
+

Extract restriction ordinance text from input text chunks.

+
+
Parameters:
+

text_chunks (list of str) – List of strings, each of which represent a chunk of text. +The order of the strings should be the order of the text +chunks.

+
+
Returns:
+

str – Ordinance text extracted from text chunks.

+
+
+
+ +
+
+async check_for_correct_size(text_chunks)[source]
+

Extract ordinance text from input text chunks for large WES.

+
+
Parameters:
+

text_chunks (list of str) – List of strings, each of which represent a chunk of text. +The order of the strings should be the order of the text +chunks.

+
+
Returns:
+

str – Ordinance text extracted from text chunks.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.html b/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.html new file mode 100644 index 00000000..87ef53e0 --- /dev/null +++ b/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.html @@ -0,0 +1,450 @@ + + + + + + + elm.ords.extraction.ordinance.OrdinanceValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ordinance.OrdinanceValidator

+
+
+class OrdinanceValidator(structured_llm_caller, text_chunks, num_to_recall=2)[source]
+

Bases: ValidationWithMemory

+

Check document text for wind ordinances.

+
+
Parameters:
+
    +
  • structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

  • +
  • text_chunks (list of str) – List of strings, each of which represent a chunk of text. +The order of the strings should be the order of the text +chunks. This validator may refer to previous text chunks to +answer validation questions.

  • +
  • num_to_recall (int, optional) – Number of chunks to check for each validation call. This +includes the original chunk! For example, if +num_to_recall=2, the validator will first check the chunk +at the requested index, and then the previous chunk as well. +By default, 2.

  • +
+
+
+

Methods

+ + + + + + + + + +

parse([min_chunks_to_process])

Parse text chunks and look for ordinance text.

parse_from_ind(ind, prompt, key)

Validate a chunk of text.

+

Attributes

+ + + + + + + + + + + + + + + + + + +

CONTAINS_ORD_PROMPT

IS_LEGAL_TEXT_PROMPT

IS_UTILITY_SCALE_PROMPT

is_legal_text

True if text was found to be from a legal source.

ordinance_text

Combined ordinance text from the individual chunks.

+
+ +

True if text was found to be from a legal source.

+
+
Type:
+

bool

+
+
+
+ +
+
+property ordinance_text
+

Combined ordinance text from the individual chunks.

+
+
Type:
+

str

+
+
+
+ +
+
+async parse(min_chunks_to_process=3)[source]
+

Parse text chunks and look for ordinance text.

+
+
Parameters:
+

min_chunks_to_process (int, optional) – Minimum number of chunks to process before checking if +document resembles legal text and ignoring chunks that don’t +pass the wind heuristic. By default, 3.

+
+
Returns:
+

boolTrue if any ordinance text was found in the chunks.

+
+
+
+ +
+
+async parse_from_ind(ind, prompt, key)
+

Validate a chunk of text.

+

Validation occurs by querying the LLM using the input prompt and +parsing the key from the response JSON. The prompt should +request that the key be a boolean output. If the key retrieved +from the LLM response is False, a number of previous text chunks +are checked as well, using the same prompt. This can be helpful +in cases where the answer to the validation prompt (e.g. does +this text pertain to a large WECS?) is only found in a previous +text chunk.

+
+
Parameters:
+
    +
  • ind (int) – Positive integer corresponding to the chunk index. +Must be less than len(text_chunks).

  • +
  • prompt (str) – Input LLM system prompt that describes the validation +question. This should request a JSON output from the LLM. +It should also take key as a formatting input.

  • +
  • key (str) – A key expected in the JSON output of the LLM containing the +response for the validation question. This string will also +be used to format the system prompt before it is passed to +the LLM.

  • +
+
+
Returns:
+

boolTrue if the LLM returned True for this text chunk or +num_to_recall-1 text chunks before it. +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.ordinance.html b/_autosummary/elm.ords.extraction.ordinance.html new file mode 100644 index 00000000..c4b3915d --- /dev/null +++ b/_autosummary/elm.ords.extraction.ordinance.html @@ -0,0 +1,336 @@ + + + + + + + elm.ords.extraction.ordinance — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.ordinance

+

ELM Ordinance document content Validation logic

+

These are primarily used to validate that a legal document applies to a +particular technology (e.g. Large Wind Energy Conversion Systems).

+

Classes

+ + + + + + + + + +

OrdinanceExtractor(llm_caller)

Extract succinct ordinance text from input

OrdinanceValidator(structured_llm_caller, ...)

Check document text for wind ordinances.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.html b/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.html new file mode 100644 index 00000000..3dba5f4b --- /dev/null +++ b/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.html @@ -0,0 +1,376 @@ + + + + + + + elm.ords.extraction.parse.StructuredOrdinanceParser — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.parse.StructuredOrdinanceParser

+
+
+class StructuredOrdinanceParser(llm_service, usage_tracker=None, **kwargs)[source]
+

Bases: BaseLLMCaller

+

LLM ordinance document structured data scraping utility.

+
+
Parameters:
+
    +
  • llm_service (elm.ords.services.base.Service) – LLM service used for queries.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – Optional tracker instance to monitor token usage during +LLM calls. By default, None.

  • +
  • **kwargs – Keyword arguments to be passed to the underlying service +processing function (i.e. llm_service.call(**kwargs)). +Should not contain the following keys:

    +
    +
      +
    • usage_tracker

    • +
    • usage_sub_label

    • +
    • messages

    • +
    +
    +

    These arguments are provided by this caller object.

    +
  • +
+
+
+

Methods

+ + + + + + +

parse(text)

Parse text and extract structure ordinance data.

+
+
+async parse(text)[source]
+

Parse text and extract structure ordinance data.

+
+
Parameters:
+

text (str) – Ordinance text which may or may not contain setbacks for one +or more features (property lines, structure, roads, etc.). +Text can also contain other supported regulations (noise, +shadow-flicker, etc,) which will be extracted as well.

+
+
Returns:
+

pd.DataFrame – DataFrame containing parsed-out ordinance values.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.parse.html b/_autosummary/elm.ords.extraction.parse.html new file mode 100644 index 00000000..b53f96ff --- /dev/null +++ b/_autosummary/elm.ords.extraction.parse.html @@ -0,0 +1,331 @@ + + + + + + + elm.ords.extraction.parse — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.parse

+

ELM Ordinance structured parsing class.

+

Classes

+ + + + + + +

StructuredOrdinanceParser(llm_service[, ...])

LLM ordinance document structured data scraping utility.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.html b/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.html new file mode 100644 index 00000000..cbc59efd --- /dev/null +++ b/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.html @@ -0,0 +1,513 @@ + + + + + + + elm.ords.extraction.tree.AsyncDecisionTree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.tree.AsyncDecisionTree

+
+
+class AsyncDecisionTree(graph)[source]
+

Bases: DecisionTree

+

Async class to traverse a directed graph of LLM prompts. Nodes are +prompts and edges are transitions between prompts based on conditions +being met in the LLM response.

+

Async class to traverse a directed graph of LLM prompts. Nodes are +prompts and edges are transitions between prompts based on conditions +being met in the LLM response.

+
+
Parameters:
+

graph (nx.DiGraph) – Directed acyclic graph where nodes are LLM prompts and edges are +logical transitions based on the response. Must have high-level +graph attribute “chat_llm_caller” which is a ChatLLMCaller +instance. Nodes should have attribute “prompt” which can have +{format} named arguments that will be filled from the high-level +graph attributes. Edges can have attribute “condition” that is a +callable to be executed on the LLM response text. An edge from a +node without a condition acts as an “else” statement if no other +edge conditions are satisfied. A single edge from node to node +does not need a condition.

+
+
+

Methods

+ + + + + + + + + + + + + + + +

async_call_node(node0)

Call the LLM with the prompt from the input node and search the successor edges for a valid transition condition

async_run([node0])

Traverse the decision tree starting at the input node.

call_node(node0)

Call the LLM with the prompt from the input node and search the successor edges for a valid transition condition

run([node0])

Traverse the decision tree starting at the input node.

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + +

all_messages_txt

Get a printout of the full conversation with the LLM

api

Get the ApiBase object.

chat_llm_caller

ChatLLMCaller instance for this tree.

graph

Get the networkx graph object

history

Get a record of the nodes traversed in the tree

messages

Get a list of the conversation messages with the LLM.

+
+
+property chat_llm_caller
+

ChatLLMCaller instance for this tree.

+
+
Type:
+

elm.ords.llm.ChatLLMCaller

+
+
+
+ +
+
+property messages
+

Get a list of the conversation messages with the LLM.

+
+
Returns:
+

list

+
+
+
+ +
+
+property all_messages_txt
+

Get a printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async async_call_node(node0)[source]
+

Call the LLM with the prompt from the input node and search the +successor edges for a valid transition condition

+
+
Parameters:
+

node0 (str) – Name of node being executed.

+
+
Returns:
+

out (str) – Next node or LLM response if at a leaf node.

+
+
+
+ +
+
+async async_run(node0='init')[source]
+

Traverse the decision tree starting at the input node.

+
+
Parameters:
+

node0 (str) – Name of starting node in the graph. This is typically called “init”

+
+
Returns:
+

out (str) – Final response from LLM at the leaf node.

+
+
+
+ +
+
+property api
+

Get the ApiBase object.

+
+
Returns:
+

ApiBase

+
+
+
+ +
+
+call_node(node0)
+

Call the LLM with the prompt from the input node and search the +successor edges for a valid transition condition

+
+
Parameters:
+

node0 (str) – Name of node being executed.

+
+
Returns:
+

out (str) – Next node or LLM response if at a leaf node.

+
+
+
+ +
+
+property graph
+

Get the networkx graph object

+
+
Returns:
+

nx.DiGraph

+
+
+
+ +
+
+property history
+

Get a record of the nodes traversed in the tree

+
+
Returns:
+

list

+
+
+
+ +
+
+run(node0='init')
+

Traverse the decision tree starting at the input node.

+
+
Parameters:
+

node0 (str) – Name of starting node in the graph. This is typically called “init”

+
+
Returns:
+

out (str) – Final response from LLM at the leaf node.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.extraction.tree.html b/_autosummary/elm.ords.extraction.tree.html new file mode 100644 index 00000000..3544ff24 --- /dev/null +++ b/_autosummary/elm.ords.extraction.tree.html @@ -0,0 +1,331 @@ + + + + + + + elm.ords.extraction.tree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.extraction.tree

+

ELM Ordinance async decision tree.

+

Classes

+ + + + + + +

AsyncDecisionTree(graph)

Async class to traverse a directed graph of LLM prompts.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.html b/_autosummary/elm.ords.html new file mode 100644 index 00000000..376586f1 --- /dev/null +++ b/_autosummary/elm.ords.html @@ -0,0 +1,346 @@ + + + + + + + elm.ords — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords

+

ELM ordinance document download and structured data extraction.

+ + + + + + + + + + + + + + + + + + + + + + + + +

elm.ords.download

ELM Ordinance county file downloading logic

elm.ords.extraction

ELM Ordinance text extraction tooling.

elm.ords.llm

ELM Ordinance LLM callers.

elm.ords.process

ELM Ordinance full processing logic

elm.ords.services

ELM asynchronous services.

elm.ords.utilities

ELM Ordinance utilities.

elm.ords.validation

ELM ordinance document content and source validation.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.calling.BaseLLMCaller.html b/_autosummary/elm.ords.llm.calling.BaseLLMCaller.html new file mode 100644 index 00000000..acc9fa93 --- /dev/null +++ b/_autosummary/elm.ords.llm.calling.BaseLLMCaller.html @@ -0,0 +1,356 @@ + + + + + + + elm.ords.llm.calling.BaseLLMCaller — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm.calling.BaseLLMCaller

+
+
+class BaseLLMCaller(llm_service, usage_tracker=None, **kwargs)[source]
+

Bases: object

+

Class to support LLM calling functionality.

+
+
Parameters:
+
    +
  • llm_service (elm.ords.services.base.Service) – LLM service used for queries.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – Optional tracker instance to monitor token usage during +LLM calls. By default, None.

  • +
  • **kwargs – Keyword arguments to be passed to the underlying service +processing function (i.e. llm_service.call(**kwargs)). +Should not contain the following keys:

    +
    +
      +
    • usage_tracker

    • +
    • usage_sub_label

    • +
    • messages

    • +
    +
    +

    These arguments are provided by this caller object.

    +
  • +
+
+
+

Methods

+ + + +
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.calling.ChatLLMCaller.html b/_autosummary/elm.ords.llm.calling.ChatLLMCaller.html new file mode 100644 index 00000000..57fe166e --- /dev/null +++ b/_autosummary/elm.ords.llm.calling.ChatLLMCaller.html @@ -0,0 +1,378 @@ + + + + + + + elm.ords.llm.calling.ChatLLMCaller — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm.calling.ChatLLMCaller

+
+
+class ChatLLMCaller(llm_service, system_message, usage_tracker=None, **kwargs)[source]
+

Bases: BaseLLMCaller

+

Class to support chat-like LLM calling functionality.

+
+
Parameters:
+
    +
  • llm_service (elm.ords.services.base.Service) – LLM service used for queries.

  • +
  • system_message (str) – System message to use for chat with LLM.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – Optional tracker instance to monitor token usage during +LLM calls. By default, None.

  • +
  • **kwargs – Keyword arguments to be passed to the underlying service +processing function (i.e. llm_service.call(**kwargs)). +Should not contain the following keys:

    +
    +
      +
    • usage_tracker

    • +
    • usage_sub_label

    • +
    • messages

    • +
    +
    +

    These arguments are provided by this caller object.

    +
  • +
+
+
+

Methods

+ + + + + + +

call(content[, usage_sub_label])

Chat with the LLM.

+
+
+async call(content, usage_sub_label='chat')[source]
+

Chat with the LLM.

+
+
Parameters:
+
    +
  • content (str) – Your chat message for the LLM.

  • +
  • usage_sub_label (str, optional) – Label to store token usage under. By default, "chat".

  • +
+
+
Returns:
+

str | None – The LLM response, as a string, or None if something went +wrong during the call.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.calling.LLMCaller.html b/_autosummary/elm.ords.llm.calling.LLMCaller.html new file mode 100644 index 00000000..e5c32725 --- /dev/null +++ b/_autosummary/elm.ords.llm.calling.LLMCaller.html @@ -0,0 +1,378 @@ + + + + + + + elm.ords.llm.calling.LLMCaller — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm.calling.LLMCaller

+
+
+class LLMCaller(llm_service, usage_tracker=None, **kwargs)[source]
+

Bases: BaseLLMCaller

+

Simple LLM caller, with no memory and no parsing utilities.

+
+
Parameters:
+
    +
  • llm_service (elm.ords.services.base.Service) – LLM service used for queries.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – Optional tracker instance to monitor token usage during +LLM calls. By default, None.

  • +
  • **kwargs – Keyword arguments to be passed to the underlying service +processing function (i.e. llm_service.call(**kwargs)). +Should not contain the following keys:

    +
    +
      +
    • usage_tracker

    • +
    • usage_sub_label

    • +
    • messages

    • +
    +
    +

    These arguments are provided by this caller object.

    +
  • +
+
+
+

Methods

+ + + + + + +

call(sys_msg, content[, usage_sub_label])

Call LLM.

+
+
+async call(sys_msg, content, usage_sub_label='default')[source]
+

Call LLM.

+
+
Parameters:
+
    +
  • sys_msg (str) – The LLM system message.

  • +
  • content (str) – Your chat message for the LLM.

  • +
  • usage_sub_label (str, optional) – Label to store token usage under. By default, "default".

  • +
+
+
Returns:
+

str | None – The LLM response, as a string, or None if something went +wrong during the call.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.html b/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.html new file mode 100644 index 00000000..16d29526 --- /dev/null +++ b/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.html @@ -0,0 +1,380 @@ + + + + + + + elm.ords.llm.calling.StructuredLLMCaller — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm.calling.StructuredLLMCaller

+
+
+class StructuredLLMCaller(llm_service, usage_tracker=None, **kwargs)[source]
+

Bases: BaseLLMCaller

+

Class to support structured (JSON) LLM calling functionality.

+
+
Parameters:
+
    +
  • llm_service (elm.ords.services.base.Service) – LLM service used for queries.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – Optional tracker instance to monitor token usage during +LLM calls. By default, None.

  • +
  • **kwargs – Keyword arguments to be passed to the underlying service +processing function (i.e. llm_service.call(**kwargs)). +Should not contain the following keys:

    +
    +
      +
    • usage_tracker

    • +
    • usage_sub_label

    • +
    • messages

    • +
    +
    +

    These arguments are provided by this caller object.

    +
  • +
+
+
+

Methods

+ + + + + + +

call(sys_msg, content[, usage_sub_label])

Call LLM for structured data retrieval.

+
+
+async call(sys_msg, content, usage_sub_label='default')[source]
+

Call LLM for structured data retrieval.

+
+
Parameters:
+
    +
  • sys_msg (str) – The LLM system message. If this text does not contain the +instruction text “Return your answer in JSON format”, it +will be added.

  • +
  • content (str) – LLM call content (typically some text to extract info from).

  • +
  • usage_sub_label (str, optional) – Label to store token usage under. By default, "default".

  • +
+
+
Returns:
+

dict – Dictionary containing the LLM-extracted features. Dictionary +may be empty if there was an error during the LLM call.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.calling.html b/_autosummary/elm.ords.llm.calling.html new file mode 100644 index 00000000..bfd2b9c4 --- /dev/null +++ b/_autosummary/elm.ords.llm.calling.html @@ -0,0 +1,340 @@ + + + + + + + elm.ords.llm.calling — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm.calling

+

ELM Ordinances LLM Calling classes.

+

Classes

+ + + + + + + + + + + + + + + +

BaseLLMCaller(llm_service[, usage_tracker])

Class to support LLM calling functionality.

ChatLLMCaller(llm_service, system_message[, ...])

Class to support chat-like LLM calling functionality.

LLMCaller(llm_service[, usage_tracker])

Simple LLM caller, with no memory and no parsing utilities.

StructuredLLMCaller(llm_service[, usage_tracker])

Class to support structured (JSON) LLM calling functionality.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.llm.html b/_autosummary/elm.ords.llm.html new file mode 100644 index 00000000..9f394555 --- /dev/null +++ b/_autosummary/elm.ords.llm.html @@ -0,0 +1,329 @@ + + + + + + + elm.ords.llm — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.llm

+

ELM Ordinance LLM callers.

+ + + + + + +

elm.ords.llm.calling

ELM Ordinances LLM Calling classes.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.process.download_doc_for_county.html b/_autosummary/elm.ords.process.download_doc_for_county.html new file mode 100644 index 00000000..e0a09240 --- /dev/null +++ b/_autosummary/elm.ords.process.download_doc_for_county.html @@ -0,0 +1,356 @@ + + + + + + + elm.ords.process.download_doc_for_county — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.process.download_doc_for_county

+
+
+async download_doc_for_county(county, text_splitter, num_urls=5, file_loader_kwargs=None, browser_semaphore=None, **kwargs)[source]
+

Download and parse ordinance document for a single county.

+
+
Parameters:
+
    +
  • county (elm.ords.utilities.location.Location) – County to retrieve ordinance document for.

  • +
  • text_splitter (obj, optional) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • num_urls (int, optional) – Number of unique Google search result URL’s to check for +ordinance document. By default, 5.

  • +
  • file_loader_kwargs (dict, optional) – Dictionary of keyword-argument pairs to initialize +elm.web.file_loader.AsyncFileLoader with. The +“pw_launch_kwargs” key in these will also be used to initialize +the elm.web.google_search.PlaywrightGoogleLinkSearch +used for the google URL search. By default, None.

  • +
  • browser_semaphore (asyncio.Semaphore, optional) – Semaphore instance that can be used to limit the number of +playwright browsers open concurrently. If None, no limits +are applied. By default, None.

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument | None – Document instance for the ordinance document, or None if no +document was found. Extracted ordinance information is stored in +the document’s metadata attribute.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.process.download_docs_for_county_with_logging.html b/_autosummary/elm.ords.process.download_docs_for_county_with_logging.html new file mode 100644 index 00000000..dd3e092d --- /dev/null +++ b/_autosummary/elm.ords.process.download_docs_for_county_with_logging.html @@ -0,0 +1,360 @@ + + + + + + + elm.ords.process.download_docs_for_county_with_logging — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.process.download_docs_for_county_with_logging

+
+
+async download_docs_for_county_with_logging(listener, log_dir, county, text_splitter, num_urls=5, file_loader_kwargs=None, browser_semaphore=None, level='INFO', **kwargs)[source]
+

Retrieve ordinance document for a single county with async logs.

+
+
Parameters:
+
    +
  • listener (elm.ords.utilities.queued_logging.LogListener) – Active LogListener instance that can be passed to +elm.ords.utilities.queued_logging.LocationFileLog.

  • +
  • log_dir (path-like) – Path to output directory to contain log file.

  • +
  • county (elm.ords.utilities.location.Location) – County to retrieve ordinance document for.

  • +
  • text_splitter (obj, optional) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. Langchain’s text splitters should work for this +input.

  • +
  • num_urls (int, optional) – Number of unique Google search result URL’s to check for +ordinance document. By default, 5.

  • +
  • file_loader_kwargs (dict, optional) – Dictionary of keyword-argument pairs to initialize +elm.web.file_loader.AsyncFileLoader with. The +“pw_launch_kwargs” key in these will also be used to initialize +the elm.web.google_search.PlaywrightGoogleLinkSearch +used for the google URL search. By default, None.

  • +
  • browser_semaphore (asyncio.Semaphore, optional) – Semaphore instance that can be used to limit the number of +playwright browsers open concurrently. If None, no limits +are applied. By default, None.

  • +
  • level (str, optional) – Log level to set for retrieval logger. By default, "INFO".

  • +
  • **kwargs – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance.

  • +
+
+
Returns:
+

elm.web.document.BaseDocument | None – Document instance for the ordinance document, or None if no +document was found. Extracted ordinance information is stored in +the document’s metadata attribute.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.process.html b/_autosummary/elm.ords.process.html new file mode 100644 index 00000000..85de425f --- /dev/null +++ b/_autosummary/elm.ords.process.html @@ -0,0 +1,336 @@ + + + + + + + elm.ords.process — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.process

+

ELM Ordinance full processing logic

+

Functions

+ + + + + + + + + + + + +

download_doc_for_county(county, text_splitter)

Download and parse ordinance document for a single county.

download_docs_for_county_with_logging(...[, ...])

Retrieve ordinance document for a single county with async logs.

process_counties_with_openai(out_dir[, ...])

Download and extract ordinances for a list of counties.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.process.process_counties_with_openai.html b/_autosummary/elm.ords.process.process_counties_with_openai.html new file mode 100644 index 00000000..c8e25949 --- /dev/null +++ b/_autosummary/elm.ords.process.process_counties_with_openai.html @@ -0,0 +1,408 @@ + + + + + + + elm.ords.process.process_counties_with_openai — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.process.process_counties_with_openai

+
+
+async process_counties_with_openai(out_dir, county_fp=None, model='gpt-4', azure_api_key=None, azure_version=None, azure_endpoint=None, llm_call_kwargs=None, llm_service_rate_limit=4000, text_splitter_chunk_size=3000, text_splitter_chunk_overlap=300, num_urls_to_check_per_county=5, max_num_concurrent_browsers=10, file_loader_kwargs=None, pytesseract_exe_fp=None, td_kwargs=None, tpe_kwargs=None, ppe_kwargs=None, log_dir=None, clean_dir=None, county_ords_dir=None, county_dbs_dir=None, log_level='INFO')[source]
+

Download and extract ordinances for a list of counties.

+
+
Parameters:
+
    +
  • out_dir (path-like) – Path to output directory. This directory will be created if it +does not exist. This directory will contain the structured +ordinance output CSV as well as all of the scraped ordinance +documents (PDFs and HTML text files). Usage information and +default options for log/clean directories will also be stored +here.

  • +
  • county_fp (path-like, optional) – Path to CSV file containing a list of counties to extract +ordinance information for. This CSV should have “County” and +“State” columns that contains the county and state names. +By default, None, which runs the extraction for all known +counties (this is untested and not currently recommended).

  • +
  • model (str, optional) – Name of LLM model to perform scraping. By default, "gpt-4".

  • +
  • azure_api_key (str, optional) – Azure OpenAI API key. By default, None, which pulls the key +from the environment variable AZURE_OPENAI_API_KEY instead.

  • +
  • azure_version (str, optional) – Azure OpenAI API version. By default, None, which pulls the +version from the environment variable AZURE_OPENAI_VERSION +instead.

  • +
  • azure_endpoint (str, optional) – Azure OpenAI API endpoint. By default, None, which pulls the +endpoint from the environment variable AZURE_OPENAI_ENDPOINT +instead.

  • +
  • llm_call_kwargs (dict, optional) – Keyword-value pairs used to initialize an +elm.ords.llm.LLMCaller instance. By default, None.

  • +
  • llm_service_rate_limit (int, optional) – Token rate limit of LLm service being used (OpenAI). +By default, 4000.

  • +
  • text_splitter_chunk_size (int, optional) – Chunk size input to +langchain.text_splitter.RecursiveCharacterTextSplitter. +By default, 3000.

  • +
  • text_splitter_chunk_overlap (int, optional) – Chunk overlap input to +langchain.text_splitter.RecursiveCharacterTextSplitter. +By default, 300.

  • +
  • num_urls_to_check_per_county (int, optional) – Number of unique Google search result URL’s to check for +ordinance document. By default, 5.

  • +
  • max_num_concurrent_browsers (int, optional) – Number of unique concurrent browser instances to open when +performing Google search. Setting this number too high on a +machine with limited processing can lead to increased timeouts +and therefore decreased quality of Google search results. +By default, 10.

  • +
  • pytesseract_exe_fp (path-like, optional) – Path to pytesseract executable. If this option is specified, OCR +parsing for PDf files will be enabled via pytesseract. +By default, None.

  • +
  • td_kwargs (dict, optional) – Keyword-value argument pairs to pass to +tempfile.TemporaryDirectory. The temporary directory is +used to store files downloaded from the web that are still being +parsed for ordinance information. By default, None.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. The thread pool +executor is used to run I/O intensive tasks like writing to a +log file. By default, None.

  • +
  • ppe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ProcessPoolExecutor. The process +pool executor is used to run CPU intensive tasks like loading +a PDF file. By default, None.

  • +
  • log_dir (path-like, optional) – Path to directory for log files. This directory will be created +if it does not exist. By default, None, which +creates a logs folder in the output directory for the +county-specific log files.

  • +
  • clean_dir (path-like, optional) – Path to directory for cleaned ordinance text output. This +directory will be created if it does not exist. By default, +None, which creates a clean folder in the output +directory for the cleaned ordinance text files.

  • +
  • county_ords_dir (path-like, optional) – Path to directory for individual county ordinance file outputs. +This directory will be created if it does not exist. +By default, None, which creates a county_ord_files +folder in the output directory.

  • +
  • county_dbs_dir (path-like, optional) – Path to directory for individual county ordinance database +outputs. This directory will be created if it does not exist. +By default, None, which creates a county_dbs folder in +the output directory.

  • +
  • log_level (str, optional) – Log level to set for county retrieval and parsing loggers. +By default, "INFO".

  • +
+
+
Returns:
+

pd.DataFrame – DataFrame of parsed ordinance information. This file will also +be stored in the output directory under “wind_db.csv”.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.base.RateLimitedService.html b/_autosummary/elm.ords.services.base.RateLimitedService.html new file mode 100644 index 00000000..69c6328f --- /dev/null +++ b/_autosummary/elm.ords.services.base.RateLimitedService.html @@ -0,0 +1,455 @@ + + + + + + + elm.ords.services.base.RateLimitedService — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.base.RateLimitedService

+
+
+class RateLimitedService(rate_limit, rate_tracker)[source]
+

Bases: Service

+

Abstract Base Class representing a rate-limited service (e.g. OpenAI)

+
+
Parameters:
+
    +
  • rate_limit (int | float) – Max usage per duration of the rate tracker. For example, +if the rate tracker is set to compute the total over +minute-long intervals, this value should be the max usage +per minute.

  • +
  • rate_tracker (elm.ords.utilities.usage.TimeBoundedUsageTracker) – A TimeBoundedUsageTracker instance. This will be used to +track usage per time interval and compare to rate_limit.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Use this method to allocate resources, if needed

call(*args, **kwargs)

Call the service.

process(*args, **kwargs)

Process a call to the service.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Use this method to clean up resources, if needed

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Check if usage is under the rate limit.

name

Service name used to pull the correct queue object.

+
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Use this method to allocate resources, if needed

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property can_process
+

Check if usage is under the rate limit.

+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+abstract async process(*args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying processing function.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Use this method to clean up resources, if needed

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.base.Service.html b/_autosummary/elm.ords.services.base.Service.html new file mode 100644 index 00000000..192e6e81 --- /dev/null +++ b/_autosummary/elm.ords.services.base.Service.html @@ -0,0 +1,446 @@ + + + + + + + elm.ords.services.base.Service — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.base.Service

+
+
+class Service[source]
+

Bases: ABC

+

Abstract base class for a Service that can be queued to run.

+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Use this method to allocate resources, if needed

call(*args, **kwargs)

Call the service.

process(*args, **kwargs)

Process a call to the service.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Use this method to clean up resources, if needed

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Check if process function can be called.

name

Service name used to pull the correct queue object.

+
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+async classmethod call(*args, **kwargs)[source]
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)[source]
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+acquire_resources()[source]
+

Use this method to allocate resources, if needed

+
+ +
+
+release_resources()[source]
+

Use this method to clean up resources, if needed

+
+ +
+
+abstract property can_process
+

Check if process function can be called.

+

This should be a fast-running method that returns a boolean +indicating wether or not the service can accept more +processing calls.

+
+ +
+
+abstract async process(*args, **kwargs)[source]
+

Process a call to the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying processing function.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.base.html b/_autosummary/elm.ords.services.base.html new file mode 100644 index 00000000..57c3d192 --- /dev/null +++ b/_autosummary/elm.ords.services.base.html @@ -0,0 +1,334 @@ + + + + + + + elm.ords.services.base — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.base

+

ELM abstract Service class.

+

Classes

+ + + + + + + + + +

RateLimitedService(rate_limit, rate_tracker)

Abstract Base Class representing a rate-limited service (e.g. OpenAI).

Service()

Abstract base class for a Service that can be queued to run.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.cpu.PDFLoader.html b/_autosummary/elm.ords.services.cpu.PDFLoader.html new file mode 100644 index 00000000..06db6748 --- /dev/null +++ b/_autosummary/elm.ords.services.cpu.PDFLoader.html @@ -0,0 +1,466 @@ + + + + + + + elm.ords.services.cpu.PDFLoader — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.cpu.PDFLoader

+
+
+class PDFLoader(**kwargs)[source]
+

Bases: ProcessPoolService

+

Class to load PDFs in a ProcessPoolExecutor.

+
+
Parameters:
+

**kwargs – Keyword-value argument pairs to pass to +concurrent.futures.ProcessPoolExecutor. +By default, None.

+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(fn, pdf_bytes, **kwargs)

Write URL doc to file asynchronously.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+async process(fn, pdf_bytes, **kwargs)[source]
+

Write URL doc to file asynchronously.

+
+
Parameters:
+
    +
  • doc (elm.web.document.Document) – Document containing meta information about the file. Must +have a “source” key in the metadata dict containing the +URL, which will be converted to a file name using +compute_fn_from_url().

  • +
  • file_content (str | bytes) – File content, typically string text for HTML files and bytes +for PDF file.

  • +
  • make_name_unique (bool, optional) – Option to make file name unique by adding a UUID at the end +of the file name. By default, False.

  • +
+
+
Returns:
+

Path – Path to output file.

+
+
+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.cpu.ProcessPoolService.html b/_autosummary/elm.ords.services.cpu.ProcessPoolService.html new file mode 100644 index 00000000..b0451758 --- /dev/null +++ b/_autosummary/elm.ords.services.cpu.ProcessPoolService.html @@ -0,0 +1,453 @@ + + + + + + + elm.ords.services.cpu.ProcessPoolService — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.cpu.ProcessPoolService

+
+
+class ProcessPoolService(**kwargs)[source]
+

Bases: Service

+

Service that contains a ProcessPoolExecutor instance

+
+
Parameters:
+

**kwargs – Keyword-value argument pairs to pass to +concurrent.futures.ProcessPoolExecutor. +By default, None.

+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(*args, **kwargs)

Process a call to the service.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Check if process function can be called.

name

Service name used to pull the correct queue object.

+
+
+acquire_resources()[source]
+

Open thread pool and temp directory

+
+ +
+
+release_resources()[source]
+

Shutdown thread pool and cleanup temp directory

+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+abstract property can_process
+

Check if process function can be called.

+

This should be a fast-running method that returns a boolean +indicating wether or not the service can accept more +processing calls.

+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+abstract async process(*args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying processing function.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.cpu.html b/_autosummary/elm.ords.services.cpu.html new file mode 100644 index 00000000..9b88e3a3 --- /dev/null +++ b/_autosummary/elm.ords.services.cpu.html @@ -0,0 +1,345 @@ + + + + + + + elm.ords.services.cpu — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.cpu

+

ELM Ordinance CPU-bound services

+

Functions

+ + + + + + + + + +

read_pdf_doc(pdf_bytes, **kwargs)

Read PDF file from bytes in a Process Pool.

read_pdf_doc_ocr(pdf_bytes, **kwargs)

Read PDF file from bytes using OCR (pytesseract) in a Process Pool.

+

Classes

+ + + + + + + + + +

PDFLoader(**kwargs)

Class to load PDFs in a ProcessPoolExecutor.

ProcessPoolService(**kwargs)

Service that contains a ProcessPoolExecutor instance

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.cpu.read_pdf_doc.html b/_autosummary/elm.ords.services.cpu.read_pdf_doc.html new file mode 100644 index 00000000..73ba43d7 --- /dev/null +++ b/_autosummary/elm.ords.services.cpu.read_pdf_doc.html @@ -0,0 +1,341 @@ + + + + + + + elm.ords.services.cpu.read_pdf_doc — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.cpu.read_pdf_doc

+
+
+async read_pdf_doc(pdf_bytes, **kwargs)[source]
+

Read PDF file from bytes in a Process Pool.

+
+
Parameters:
+
    +
  • pdf_bytes (bytes) – Bytes containing PDF file.

  • +
  • **kwargs – Keyword-value arguments to pass to +elm.web.document.PDFDocument initializer.

  • +
+
+
Returns:
+

elm.web.document.PDFDocument – PDFDocument instances with pages loaded as text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.html b/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.html new file mode 100644 index 00000000..ca8a0cc9 --- /dev/null +++ b/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.html @@ -0,0 +1,344 @@ + + + + + + + elm.ords.services.cpu.read_pdf_doc_ocr — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.cpu.read_pdf_doc_ocr

+
+
+async read_pdf_doc_ocr(pdf_bytes, **kwargs)[source]
+

Read PDF file from bytes using OCR (pytesseract) in a Process Pool.

+

Note that Pytesseract must be set up properly for this method to +work. In particular, the pytesseract.pytesseract.tesseract_cmd +attribute must be set to point to the pytesseract exe.

+
+
Parameters:
+
    +
  • pdf_bytes (bytes) – Bytes containing PDF file.

  • +
  • **kwargs – Keyword-value arguments to pass to +elm.web.document.PDFDocument initializer.

  • +
+
+
Returns:
+

elm.web.document.PDFDocument – PDFDocument instances with pages loaded as text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.html b/_autosummary/elm.ords.services.html new file mode 100644 index 00000000..ed97c011 --- /dev/null +++ b/_autosummary/elm.ords.services.html @@ -0,0 +1,347 @@ + + + + + + + elm.ords.services — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services

+

ELM asynchronous services.

+ + + + + + + + + + + + + + + + + + + + + + + + +

elm.ords.services.base

ELM abstract Service class.

elm.ords.services.cpu

ELM Ordinance CPU-bound services

elm.ords.services.openai

ELM Ordinances OpenAI service amd utils.

elm.ords.services.provider

ELM service provider classes.

elm.ords.services.queues

Module for "singleton" QUERIES dictionary

elm.ords.services.threaded

ELM Ordinance Threaded services

elm.ords.services.usage

ELM Ordinances usage tracking utilities.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.openai.OpenAIService.html b/_autosummary/elm.ords.services.openai.OpenAIService.html new file mode 100644 index 00000000..d27f2006 --- /dev/null +++ b/_autosummary/elm.ords.services.openai.OpenAIService.html @@ -0,0 +1,473 @@ + + + + + + + elm.ords.services.openai.OpenAIService — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.openai.OpenAIService

+
+
+class OpenAIService(client, rate_limit=1000.0, rate_tracker=None)[source]
+

Bases: RateLimitedService

+

OpenAI Chat GPT query service

+
+
Parameters:
+
    +
  • client (openai.AsyncOpenAI | openai.AsyncAzureOpenAI) – Async OpenAI client instance. Must have an async +client.chat.completions.create method.

  • +
  • rate_limit (int | float, optional) – Token rate limit (typically per minute, but the time +interval is ultimately controlled by the rate_tracker +instance). By default, 1e3.

  • +
  • rate_tracker (TimeBoundedUsageTracker, optional) – A TimeBoundedUsageTracker instance. This will be used to +track usage per time interval and compare to rate_limit. +If None, a TimeBoundedUsageTracker instance is created +with default parameters. By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Use this method to allocate resources, if needed

call(*args, **kwargs)

Call the service.

process([usage_tracker, usage_sub_label])

Process a call to OpenAI Chat GPT.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Use this method to clean up resources, if needed

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Check if usage is under the rate limit.

name

Service name used to pull the correct queue object.

+
+
+async process(usage_tracker=None, usage_sub_label='default', *, model, **kwargs)[source]
+

Process a call to OpenAI Chat GPT.

+

Note that this method automatically retries queries (with +backoff) if a rate limit error is throw by the API.

+
+
Parameters:
+
    +
  • model (str) – OpenAI GPT model to query.

  • +
  • usage_tracker (elm.ords.services.usage.UsageTracker, optional) – UsageTracker instance. Providing this input will update your +tracker with this call’s token usage info. +By default, None.

  • +
  • usage_sub_label (str, optional) – Optional label to categorize usage under. This can be used +to track usage related to certain categories. +By default, "default".

  • +
  • **kwargs – Keyword arguments to be passed to +client.chat.completions.create.

  • +
+
+
Returns:
+

str | None – Chat GPT response as a string, or None if the call +failed.

+
+
+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Use this method to allocate resources, if needed

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property can_process
+

Check if usage is under the rate limit.

+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Use this method to clean up resources, if needed

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.openai.count_tokens.html b/_autosummary/elm.ords.services.openai.count_tokens.html new file mode 100644 index 00000000..16fb2fff --- /dev/null +++ b/_autosummary/elm.ords.services.openai.count_tokens.html @@ -0,0 +1,346 @@ + + + + + + + elm.ords.services.openai.count_tokens — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.openai.count_tokens

+
+
+count_tokens(messages, model)[source]
+

Count the number of tokens in an outgoing set of messages.

+
+
Parameters:
+
    +
  • messages (list) – A list of message objects, where the latter is represented +using a dictionary. Each message dictionary must have a +“content” key containing the string to count tokens for.

  • +
  • model (str) – The OpenAI model being used. This input will be passed to +tiktoken.encoding_for_model().

  • +
+
+
Returns:
+

int – Total number of tokens in the set of messages outgoing to +OpenAI.

+
+
+

References

+

https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.openai.html b/_autosummary/elm.ords.services.openai.html new file mode 100644 index 00000000..c382a547 --- /dev/null +++ b/_autosummary/elm.ords.services.openai.html @@ -0,0 +1,342 @@ + + + + + + + elm.ords.services.openai — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.openai

+

ELM Ordinances OpenAI service amd utils.

+

Functions

+ + + + + + + + + +

count_tokens(messages, model)

Count the number of tokens in an outgoing set of messages.

usage_from_response(current_usage, response)

OpenAI usage parser.

+

Classes

+ + + + + + +

OpenAIService(client[, rate_limit, rate_tracker])

OpenAI Chat GPT query service

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.openai.usage_from_response.html b/_autosummary/elm.ords.services.openai.usage_from_response.html new file mode 100644 index 00000000..959f680a --- /dev/null +++ b/_autosummary/elm.ords.services.openai.usage_from_response.html @@ -0,0 +1,346 @@ + + + + + + + elm.ords.services.openai.usage_from_response — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.openai.usage_from_response

+
+
+usage_from_response(current_usage, response)[source]
+

OpenAI usage parser.

+
+
Parameters:
+
    +
  • current_usage (dict) – Dictionary containing current usage information. For OpenAI +trackers, this may contain the keys "requests", +"prompt_tokens", and "response_tokens" if there is +already existing tracking information. Empty dictionaries are +allowed, in which case the three keys above will be added to +this input.

  • +
  • response (openai.Completion) – OpenAI Completion object. Must contain a usage attribute +that

  • +
+
+
Returns:
+

dict – Dictionary with updated usage statistics.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.provider.RunningAsyncServices.html b/_autosummary/elm.ords.services.provider.RunningAsyncServices.html new file mode 100644 index 00000000..5a4336fd --- /dev/null +++ b/_autosummary/elm.ords.services.provider.RunningAsyncServices.html @@ -0,0 +1,341 @@ + + + + + + + elm.ords.services.provider.RunningAsyncServices — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.provider.RunningAsyncServices

+
+
+class RunningAsyncServices(services)[source]
+

Bases: object

+

Async context manager for running services.

+
+
Parameters:
+

services (iterable) – An iterable of async services to run during program +execution.

+
+
+

Methods

+ + + +
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.provider.html b/_autosummary/elm.ords.services.provider.html new file mode 100644 index 00000000..c17f0fab --- /dev/null +++ b/_autosummary/elm.ords.services.provider.html @@ -0,0 +1,331 @@ + + + + + + + elm.ords.services.provider — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.provider

+

ELM service provider classes.

+

Classes

+ + + + + + +

RunningAsyncServices(services)

Async context manager for running services.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.queues.get_service_queue.html b/_autosummary/elm.ords.services.queues.get_service_queue.html new file mode 100644 index 00000000..ca6fa07e --- /dev/null +++ b/_autosummary/elm.ords.services.queues.get_service_queue.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.services.queues.get_service_queue — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.queues.get_service_queue

+
+
+get_service_queue(service_name)[source]
+

Retrieve the queue for a service.

+
+
Parameters:
+

service_name (str) – Name of service to retrieve queue for.

+
+
Returns:
+

asyncio.Queue() | None – Queue instance for this service, or None if the queue was not +initialized.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.queues.html b/_autosummary/elm.ords.services.queues.html new file mode 100644 index 00000000..1756c10c --- /dev/null +++ b/_autosummary/elm.ords.services.queues.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.services.queues — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.queues

+

Module for “singleton” QUERIES dictionary

+

Functions

+ + + + + + + + + + + + +

get_service_queue(service_name)

Retrieve the queue for a service.

initialize_service_queue(service_name)

Initialize an asyncio.Queue() for a service.

tear_down_service_queue(service_name)

Remove the queue for a service.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.queues.initialize_service_queue.html b/_autosummary/elm.ords.services.queues.initialize_service_queue.html new file mode 100644 index 00000000..a4f14730 --- /dev/null +++ b/_autosummary/elm.ords.services.queues.initialize_service_queue.html @@ -0,0 +1,338 @@ + + + + + + + elm.ords.services.queues.initialize_service_queue — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.queues.initialize_service_queue

+
+
+initialize_service_queue(service_name)[source]
+

Initialize an asyncio.Queue() for a service.

+

Repeated calls to this function return the same queue

+
+
Parameters:
+

service_name (str) – Name of service to initialize queue for.

+
+
Returns:
+

asyncio.Queue() – Queue instance for this service.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.queues.tear_down_service_queue.html b/_autosummary/elm.ords.services.queues.tear_down_service_queue.html new file mode 100644 index 00000000..1ff42e96 --- /dev/null +++ b/_autosummary/elm.ords.services.queues.tear_down_service_queue.html @@ -0,0 +1,336 @@ + + + + + + + elm.ords.services.queues.tear_down_service_queue — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.queues.tear_down_service_queue

+
+
+tear_down_service_queue(service_name)[source]
+

Remove the queue for a service.

+

The queue does not have to exist, so repeated calls to this function +are OK.

+
+
Parameters:
+

service_name (str) – Name of service to delete queue for.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.CleanedFileWriter.html b/_autosummary/elm.ords.services.threaded.CleanedFileWriter.html new file mode 100644 index 00000000..1288b4b2 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.CleanedFileWriter.html @@ -0,0 +1,463 @@ + + + + + + + elm.ords.services.threaded.CleanedFileWriter — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.CleanedFileWriter

+
+
+class CleanedFileWriter(out_dir, tpe_kwargs=None)[source]
+

Bases: StoreFileOnDisk

+

Service that writes cleaned text to a file

+
+
Parameters:
+
    +
  • out_dir (path-like) – Path to output directory where file should be stored.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(doc)

Store file in out directory.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process(doc)
+

Store file in out directory.

+
+
Parameters:
+

doc (elm.web.document.Document) – Document containing meta information about the file. Must +have relevant processing keys in the metadata dict, +otherwise the file may not be stored in the output +directory.

+
+
Returns:
+

Path | None – Path to output file, or None if no file was stored.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.FileMover.html b/_autosummary/elm.ords.services.threaded.FileMover.html new file mode 100644 index 00000000..cefcfa2a --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.FileMover.html @@ -0,0 +1,463 @@ + + + + + + + elm.ords.services.threaded.FileMover — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.FileMover

+
+
+class FileMover(out_dir, tpe_kwargs=None)[source]
+

Bases: StoreFileOnDisk

+

Service that moves files to an output directory

+
+
Parameters:
+
    +
  • out_dir (path-like) – Path to output directory where file should be stored.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(doc)

Store file in out directory.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process(doc)
+

Store file in out directory.

+
+
Parameters:
+

doc (elm.web.document.Document) – Document containing meta information about the file. Must +have relevant processing keys in the metadata dict, +otherwise the file may not be stored in the output +directory.

+
+
Returns:
+

Path | None – Path to output file, or None if no file was stored.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.html b/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.html new file mode 100644 index 00000000..b25fe727 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.html @@ -0,0 +1,463 @@ + + + + + + + elm.ords.services.threaded.OrdDBFileWriter — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.OrdDBFileWriter

+
+
+class OrdDBFileWriter(out_dir, tpe_kwargs=None)[source]
+

Bases: StoreFileOnDisk

+

Service that writes cleaned text to a file

+
+
Parameters:
+
    +
  • out_dir (path-like) – Path to output directory where file should be stored.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(doc)

Store file in out directory.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process(doc)
+

Store file in out directory.

+
+
Parameters:
+

doc (elm.web.document.Document) – Document containing meta information about the file. Must +have relevant processing keys in the metadata dict, +otherwise the file may not be stored in the output +directory.

+
+
Returns:
+

Path | None – Path to output file, or None if no file was stored.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.html b/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.html new file mode 100644 index 00000000..be518456 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.html @@ -0,0 +1,464 @@ + + + + + + + elm.ords.services.threaded.StoreFileOnDisk — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.StoreFileOnDisk

+
+
+class StoreFileOnDisk(out_dir, tpe_kwargs=None)[source]
+

Bases: ThreadedService

+

Abstract service that manages the storage of a file on disk.

+

Storage can occur due to creation or a move of a file.

+
+
Parameters:
+
    +
  • out_dir (path-like) – Path to output directory where file should be stored.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(doc)

Store file in out directory.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+async process(doc)[source]
+

Store file in out directory.

+
+
Parameters:
+

doc (elm.web.document.Document) – Document containing meta information about the file. Must +have relevant processing keys in the metadata dict, +otherwise the file may not be stored in the output +directory.

+
+
Returns:
+

Path | None – Path to output file, or None if no file was stored.

+
+
+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.TempFileCache.html b/_autosummary/elm.ords.services.threaded.TempFileCache.html new file mode 100644 index 00000000..94e065b3 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.TempFileCache.html @@ -0,0 +1,470 @@ + + + + + + + elm.ords.services.threaded.TempFileCache — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.TempFileCache

+
+
+class TempFileCache(td_kwargs=None, tpe_kwargs=None)[source]
+

Bases: ThreadedService

+

Service that locally caches files downloaded from the internet

+
+
Parameters:
+
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(doc, file_content[, make_name_unique])

Write URL doc to file asynchronously.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Always True (limiting is handled by asyncio)

name

Service name used to pull the correct queue object.

+
+
+property can_process
+

Always True (limiting is handled by asyncio)

+
+
Type:
+

bool

+
+
+
+ +
+
+acquire_resources()[source]
+

Open thread pool and temp directory

+
+ +
+
+release_resources()[source]
+

Shutdown thread pool and cleanup temp directory

+
+ +
+
+async process(doc, file_content, make_name_unique=False)[source]
+

Write URL doc to file asynchronously.

+
+
Parameters:
+
    +
  • doc (elm.web.document.Document) – Document containing meta information about the file. Must +have a “source” key in the metadata dict containing the +URL, which will be converted to a file name using +compute_fn_from_url().

  • +
  • file_content (str | bytes) – File content, typically string text for HTML files and bytes +for PDF file.

  • +
  • make_name_unique (bool, optional) – Option to make file name unique by adding a UUID at the end +of the file name. By default, False.

  • +
+
+
Returns:
+

Path – Path to output file.

+
+
+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.ThreadedService.html b/_autosummary/elm.ords.services.threaded.ThreadedService.html new file mode 100644 index 00000000..9264d745 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.ThreadedService.html @@ -0,0 +1,453 @@ + + + + + + + elm.ords.services.threaded.ThreadedService — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.ThreadedService

+
+
+class ThreadedService(**kwargs)[source]
+

Bases: Service

+

Service that contains a ThreadPoolExecutor instance

+
+
Parameters:
+

**kwargs – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(*args, **kwargs)

Process a call to the service.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

Check if process function can be called.

name

Service name used to pull the correct queue object.

+
+
+acquire_resources()[source]
+

Open thread pool and temp directory

+
+ +
+
+release_resources()[source]
+

Shutdown thread pool and cleanup temp directory

+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+abstract property can_process
+

Check if process function can be called.

+

This should be a fast-running method that returns a boolean +indicating wether or not the service can accept more +processing calls.

+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+abstract async process(*args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying processing function.

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.UsageUpdater.html b/_autosummary/elm.ords.services.threaded.UsageUpdater.html new file mode 100644 index 00000000..db9fba79 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.UsageUpdater.html @@ -0,0 +1,461 @@ + + + + + + + elm.ords.services.threaded.UsageUpdater — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded.UsageUpdater

+
+
+class UsageUpdater(usage_fp, tpe_kwargs=None)[source]
+

Bases: ThreadedService

+

Service that updates usage info from a tracker into a file.

+
+
Parameters:
+
    +
  • usage_fp (path-like) – Path to JSON file where usage should be tracked.

  • +
  • tpe_kwargs (dict, optional) – Keyword-value argument pairs to pass to +concurrent.futures.ThreadPoolExecutor. +By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + +

acquire_resources()

Open thread pool and temp directory

call(*args, **kwargs)

Call the service.

process(tracker)

Add usage from tracker to file.

process_using_futures(fut, *args, **kwargs)

Process a call to the service.

release_resources()

Shutdown thread pool and cleanup temp directory

+

Attributes

+ + + + + + + + + + + + +

MAX_CONCURRENT_JOBS

Max number of concurrent job submissions.

can_process

True if file not currently being written to.``

name

Service name used to pull the correct queue object.

+
+
+property can_process
+

True if file not currently being written to.``

+
+
Type:
+

bool

+
+
+
+ +
+
+async process(tracker)[source]
+

Add usage from tracker to file.

+

Any existing usage info in the file will remain unchanged +EXCEPT for anything under the label of the input tracker, +all of which will be replaced with info from the tracker itself.

+
+
Parameters:
+

tracker (elm.ods.services.usage.UsageTracker) – A usage tracker instance that contains usage info to be +added to output file.

+
+
+
+ +
+
+MAX_CONCURRENT_JOBS = 10000
+

Max number of concurrent job submissions.

+
+ +
+
+acquire_resources()
+

Open thread pool and temp directory

+
+ +
+
+async classmethod call(*args, **kwargs)
+

Call the service.

+
+
Parameters:
+

*args, **kwargs – Positional and keyword arguments to be passed to the +underlying service processing function.

+
+
Returns:
+

obj – A response object from the underlying service.

+
+
+
+ +
+
+property name
+

Service name used to pull the correct queue object.

+
+
Type:
+

str

+
+
+
+ +
+
+async process_using_futures(fut, *args, **kwargs)
+

Process a call to the service.

+
+
Parameters:
+
    +
  • fut (asyncio.Future) – A future object that should get the result of the processing +operation. If the processing function returns answer, +this method should call fut.set_result(answer).

  • +
  • **kwargs – Keyword arguments to be passed to the +underlying processing function.

  • +
+
+
+
+ +
+
+release_resources()
+

Shutdown thread pool and cleanup temp directory

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.threaded.html b/_autosummary/elm.ords.services.threaded.html new file mode 100644 index 00000000..f41cfd95 --- /dev/null +++ b/_autosummary/elm.ords.services.threaded.html @@ -0,0 +1,349 @@ + + + + + + + elm.ords.services.threaded — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.threaded

+

ELM Ordinance Threaded services

+

Classes

+ + + + + + + + + + + + + + + + + + + + + + + + +

CleanedFileWriter(out_dir[, tpe_kwargs])

Service that writes cleaned text to a file

FileMover(out_dir[, tpe_kwargs])

Service that moves files to an output directory

OrdDBFileWriter(out_dir[, tpe_kwargs])

Service that writes cleaned text to a file

StoreFileOnDisk(out_dir[, tpe_kwargs])

Abstract service that manages the storage of a file on disk.

TempFileCache([td_kwargs, tpe_kwargs])

Service that locally caches files downloaded from the internet

ThreadedService(**kwargs)

Service that contains a ThreadPoolExecutor instance

UsageUpdater(usage_fp[, tpe_kwargs])

Service that updates usage info from a tracker into a file.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.html b/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.html new file mode 100644 index 00000000..43cd0d56 --- /dev/null +++ b/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.html @@ -0,0 +1,381 @@ + + + + + + + elm.ords.services.usage.TimeBoundedUsageTracker — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.usage.TimeBoundedUsageTracker

+
+
+class TimeBoundedUsageTracker(max_seconds=70)[source]
+

Bases: object

+

Track usage of a resource over time.

+

This class wraps a double-ended queue, and any inputs older than +a certain time are dropped. Those values are also subtracted from +the running total.

+

References

+

https://stackoverflow.com/questions/51485656/efficient-time-bound-queue-in-python

+
+
Parameters:
+

max_seconds (int, optional) – Maximum age in seconds of an element before it is dropped +from consideration. By default, 65.

+
+
+

Methods

+ + + + + + +

add(value)

Add a value to track.

+

Attributes

+ + + + + + +

total

Total value of all entries younger than max_seconds

+
+
+property total
+

Total value of all entries younger than max_seconds

+
+
Type:
+

float

+
+
+
+ +
+
+add(value)[source]
+

Add a value to track.

+
+
Parameters:
+

value (int | float) – A new value to add to the queue. It’s total will be added to +the running total, and it will live for max_seconds before +being discarded.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.usage.TimedEntry.html b/_autosummary/elm.ords.services.usage.TimedEntry.html new file mode 100644 index 00000000..f808c813 --- /dev/null +++ b/_autosummary/elm.ords.services.usage.TimedEntry.html @@ -0,0 +1,346 @@ + + + + + + + elm.ords.services.usage.TimedEntry — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.usage.TimedEntry

+
+
+class TimedEntry(value)[source]
+

Bases: object

+

An entry that performs comparisons based on time added, not value.

+

Examples

+
>>> a = TimedEntry(100)
+>>> a > 1000
+True
+
+
+
+
Parameters:
+

value (obj) – Some value to store as an entry.

+
+
+

Methods

+ + + +
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.usage.UsageTracker.html b/_autosummary/elm.ords.services.usage.UsageTracker.html new file mode 100644 index 00000000..ac01331b --- /dev/null +++ b/_autosummary/elm.ords.services.usage.UsageTracker.html @@ -0,0 +1,488 @@ + + + + + + + elm.ords.services.usage.UsageTracker — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.usage.UsageTracker

+
+
+class UsageTracker(label, response_parser)[source]
+

Bases: UserDict

+

Rate or AIP usage tracker.

+
+
Parameters:
+
    +
  • label (str) – Top-level label to use when adding this usage information to +another dictionary.

  • +
  • response_parser (callable) – A callable that takes the current usage info (in dictionary +format) and an LLm response as inputs, updates the usage +dictionary with usage info based on the response, and +returns the updated dictionary. See, for example, +elm.ords.services.openai.usage_from_response().

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

add_to(other)

Add the contents of this usage information to another dict.

clear()

copy()

fromkeys(iterable[, value])

get(k[,d])

items()

keys()

pop(k[,d])

If key is not found, d is returned if given, otherwise KeyError is raised.

popitem()

as a 2-tuple; but raise KeyError if D is empty.

setdefault(k[,d])

update([E, ]**F)

If E present and has a .keys() method, does: for k in E: D[k] = E[k] If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v In either case, this is followed by: for k, v in F.items(): D[k] = v

update_from_model([response, sub_label])

Update usage from a model response.

values()

+

Attributes

+ + + + + + +

totals

Compute total usage across all sub-labels.

+
+
+add_to(other)[source]
+

Add the contents of this usage information to another dict.

+

The contents of this dictionary are stored under the label +key that this object was initialized with.

+
+
Parameters:
+

other (dict) – A dictionary to add the contents of this one to.

+
+
+
+ +
+
+property totals
+

Compute total usage across all sub-labels.

+
+
Returns:
+

dict – Dictionary containing usage information totaled across all +sub-labels.

+
+
+
+ +
+
+update_from_model(response=None, sub_label='default')[source]
+

Update usage from a model response.

+
+
Parameters:
+
    +
  • response (object, optional) – Model call response, which either contains usage information +or can be used to infer/compute usage. If None, no +update is made.

  • +
  • sub_label (str, optional) – Optional label to categorize usage under. This can be used +to track usage related to certain categories. +By default, "default".

  • +
+
+
+
+ +
+
+clear() None.  Remove all items from D.
+
+ +
+
+get(k[, d]) D[k] if k in D, else d.  d defaults to None.
+
+ +
+
+items() a set-like object providing a view on D's items
+
+ +
+
+keys() a set-like object providing a view on D's keys
+
+ +
+
+pop(k[, d]) v, remove specified key and return the corresponding value.
+

If key is not found, d is returned if given, otherwise KeyError is raised.

+
+ +
+
+popitem() (k, v), remove and return some (key, value) pair
+

as a 2-tuple; but raise KeyError if D is empty.

+
+ +
+
+setdefault(k[, d]) D.get(k,d), also set D[k]=d if k not in D
+
+ +
+
+update([E, ]**F) None.  Update D from mapping/iterable E and F.
+

If E present and has a .keys() method, does: for k in E: D[k] = E[k] +If E present and lacks .keys() method, does: for (k, v) in E: D[k] = v +In either case, this is followed by: for k, v in F.items(): D[k] = v

+
+ +
+
+values() an object providing a view on D's values
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.services.usage.html b/_autosummary/elm.ords.services.usage.html new file mode 100644 index 00000000..f28c0704 --- /dev/null +++ b/_autosummary/elm.ords.services.usage.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.services.usage — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.services.usage

+

ELM Ordinances usage tracking utilities.

+

Classes

+ + + + + + + + + + + + +

TimeBoundedUsageTracker([max_seconds])

Track usage of a resource over time.

TimedEntry(value)

An entry that performs comparisons based on time added, not value.

UsageTracker(label, response_parser)

Rate or AIP usage tracker.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.counties.county_websites.html b/_autosummary/elm.ords.utilities.counties.county_websites.html new file mode 100644 index 00000000..78f4308f --- /dev/null +++ b/_autosummary/elm.ords.utilities.counties.county_websites.html @@ -0,0 +1,341 @@ + + + + + + + elm.ords.utilities.counties.county_websites — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.counties.county_websites

+
+
+county_websites(county_info=None)[source]
+

Load mapping of county name and state to website.

+
+
Parameters:
+

county_info (pd.DataFrame, optional) – DataFrame containing county names and websites. If None, +this info is loaded using load_county_info(). +By default, None.

+
+
Returns:
+

dict – Dictionary where keys are tuples of (county, state) and keys are +the relevant website URL. Note that county and state names are +lowercase.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.counties.html b/_autosummary/elm.ords.utilities.counties.html new file mode 100644 index 00000000..5118ade0 --- /dev/null +++ b/_autosummary/elm.ords.utilities.counties.html @@ -0,0 +1,337 @@ + + + + + + + elm.ords.utilities.counties — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.counties

+

ELM Ordinance county info

+

Functions

+ + + + + + + + + + + + +

county_websites([county_info])

Load mapping of county name and state to website.

load_all_county_info()

Load DataFrame containing info like names and websites for all counties.

load_counties_from_fp(county_fp)

Load county info base don counties in the input fp.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.counties.load_all_county_info.html b/_autosummary/elm.ords.utilities.counties.load_all_county_info.html new file mode 100644 index 00000000..a04abe3e --- /dev/null +++ b/_autosummary/elm.ords.utilities.counties.load_all_county_info.html @@ -0,0 +1,335 @@ + + + + + + + elm.ords.utilities.counties.load_all_county_info — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.counties.load_all_county_info

+
+
+load_all_county_info()[source]
+

Load DataFrame containing info like names and websites for all counties.

+
+
Returns:
+

pd.DataFrame – DataFrame containing county info like names, FIPS, websites, +etc. for all counties.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.html b/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.html new file mode 100644 index 00000000..aef28270 --- /dev/null +++ b/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.html @@ -0,0 +1,339 @@ + + + + + + + elm.ords.utilities.counties.load_counties_from_fp — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.counties.load_counties_from_fp

+
+
+load_counties_from_fp(county_fp)[source]
+

Load county info base don counties in the input fp.

+
+
Parameters:
+

county_fp (path-like) – Path to csv file containing “County” and “State” columns that +define the counties for which info should be loaded.

+
+
Returns:
+

pd.DataFrame – DataFrame containing county info like names, FIPS, websites, +etc. for all requested counties (that were found).

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.html b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.html new file mode 100644 index 00000000..a1b97f95 --- /dev/null +++ b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.html @@ -0,0 +1,330 @@ + + + + + + + elm.ords.utilities.exceptions.ELMOrdsError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.exceptions.ELMOrdsError

+
+
+exception ELMOrdsError(*args, **kwargs)[source]
+

Generic ELM Ordinance Error.

+

Init exception and broadcast message to logger.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.html b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.html new file mode 100644 index 00000000..b41077fd --- /dev/null +++ b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.html @@ -0,0 +1,330 @@ + + + + + + + elm.ords.utilities.exceptions.ELMOrdsNotInitializedError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.exceptions.ELMOrdsNotInitializedError

+
+
+exception ELMOrdsNotInitializedError(*args, **kwargs)[source]
+

ELM Ordinances not initialized error.

+

Init exception and broadcast message to logger.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.html b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.html new file mode 100644 index 00000000..20eade7b --- /dev/null +++ b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.html @@ -0,0 +1,330 @@ + + + + + + + elm.ords.utilities.exceptions.ELMOrdsRuntimeError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.exceptions.ELMOrdsRuntimeError

+
+
+exception ELMOrdsRuntimeError(*args, **kwargs)[source]
+

ELM Ordinances RuntimeError.

+

Init exception and broadcast message to logger.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.html b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.html new file mode 100644 index 00000000..1ba4e1e6 --- /dev/null +++ b/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.html @@ -0,0 +1,330 @@ + + + + + + + elm.ords.utilities.exceptions.ELMOrdsValueError — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.exceptions.ELMOrdsValueError

+
+
+exception ELMOrdsValueError(*args, **kwargs)[source]
+

ELM Ordinances ValueError.

+

Init exception and broadcast message to logger.

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.exceptions.html b/_autosummary/elm.ords.utilities.exceptions.html new file mode 100644 index 00000000..d9684d7a --- /dev/null +++ b/_autosummary/elm.ords.utilities.exceptions.html @@ -0,0 +1,340 @@ + + + + + + + elm.ords.utilities.exceptions — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.exceptions

+

Custom Exceptions and Errors for ELM Ordinances.

+

Exceptions

+ + + + + + + + + + + + + + + +

ELMOrdsError(*args, **kwargs)

Generic ELM Ordinance Error.

ELMOrdsNotInitializedError(*args, **kwargs)

ELM Ordinances not initialized error.

ELMOrdsRuntimeError(*args, **kwargs)

ELM Ordinances RuntimeError.

ELMOrdsValueError(*args, **kwargs)

ELM Ordinances ValueError.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.html b/_autosummary/elm.ords.utilities.html new file mode 100644 index 00000000..98ec8750 --- /dev/null +++ b/_autosummary/elm.ords.utilities.html @@ -0,0 +1,341 @@ + + + + + + + elm.ords.utilities — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities

+

ELM Ordinance utilities.

+ + + + + + + + + + + + + + + + + + +

elm.ords.utilities.counties

ELM Ordinance county info

elm.ords.utilities.exceptions

Custom Exceptions and Errors for ELM Ordinances.

elm.ords.utilities.location

ELM Ordinance location specification utilities

elm.ords.utilities.parsing

ELM Ordinances parsing utilities.

elm.ords.utilities.queued_logging

ELM Ordinance queued logging.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.location.County.html b/_autosummary/elm.ords.utilities.location.County.html new file mode 100644 index 00000000..0e15d733 --- /dev/null +++ b/_autosummary/elm.ords.utilities.location.County.html @@ -0,0 +1,365 @@ + + + + + + + elm.ords.utilities.location.County — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.location.County

+
+
+class County(name, state, fips=None, is_parish=False)[source]
+

Bases: Location

+

Class representing a county

+
+
Parameters:
+
    +
  • name (str) – Name of the county.

  • +
  • state (str) – State containing the county.

  • +
  • fips (int | str, optional) – Optional county FIPS code. By default, None.

  • +
  • is_parish (bool, optional) – Flag indicating wether or not this county is classified as +a parish. By default, False.

  • +
+
+
+

Methods

+ + + +
+

Attributes

+ + + + + + +

full_name

Full county name in format '{name} County, {state}'

+
+
+property full_name
+

Full county name in format ‘{name} County, {state}’

+
+
Type:
+

str

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.location.Location.html b/_autosummary/elm.ords.utilities.location.Location.html new file mode 100644 index 00000000..1ab4dc40 --- /dev/null +++ b/_autosummary/elm.ords.utilities.location.Location.html @@ -0,0 +1,359 @@ + + + + + + + elm.ords.utilities.location.Location — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.location.Location

+
+
+class Location(name)[source]
+

Bases: ABC

+

Abstract location representation.

+
+
Parameters:
+

name (str) – Name of location.

+
+
+

Methods

+ + + +
+

Attributes

+ + + + + + +

full_name

Full name of location

+
+
+abstract property full_name
+

Full name of location

+
+
Type:
+

str

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.location.html b/_autosummary/elm.ords.utilities.location.html new file mode 100644 index 00000000..21662047 --- /dev/null +++ b/_autosummary/elm.ords.utilities.location.html @@ -0,0 +1,334 @@ + + + + + + + elm.ords.utilities.location — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.location

+

ELM Ordinance location specification utilities

+

Classes

+ + + + + + + + + +

County(name, state[, fips, is_parish])

Class representing a county

Location(name)

Abstract location representation.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.parsing.html b/_autosummary/elm.ords.utilities.parsing.html new file mode 100644 index 00000000..fc12c669 --- /dev/null +++ b/_autosummary/elm.ords.utilities.parsing.html @@ -0,0 +1,334 @@ + + + + + + + elm.ords.utilities.parsing — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.parsing

+

ELM Ordinances parsing utilities.

+

Functions

+ + + + + + + + + +

llm_response_as_json(content)

LLM response to JSON.

merge_overlapping_texts(text_chunks[, n])

Merge chunks fo text by removing any overlap.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.html b/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.html new file mode 100644 index 00000000..90c930a2 --- /dev/null +++ b/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.html @@ -0,0 +1,339 @@ + + + + + + + elm.ords.utilities.parsing.llm_response_as_json — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.parsing.llm_response_as_json

+
+
+llm_response_as_json(content)[source]
+

LLM response to JSON.

+
+
Parameters:
+

content (str) – LLM response that contains a string representation of +a JSON file.

+
+
Returns:
+

dict – Response parsed into dictionary. This dictionary will be empty +if the response cannot be parsed by JSON.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.html b/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.html new file mode 100644 index 00000000..68a87e1c --- /dev/null +++ b/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.html @@ -0,0 +1,342 @@ + + + + + + + elm.ords.utilities.parsing.merge_overlapping_texts — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.parsing.merge_overlapping_texts

+
+
+merge_overlapping_texts(text_chunks, n=300)[source]
+

Merge chunks fo text by removing any overlap.

+
+
Parameters:
+
    +
  • text_chunks (iterable of str) – Iterable containing text chunks which may or may not contain +consecutive overlapping portions.

  • +
  • n (int, optional) – Number of characters to check at the beginning of each message +for overlap with the previous message. By default, 100.

  • +
+
+
Returns:
+

str – Merged text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.html b/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.html new file mode 100644 index 00000000..356d689b --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.html @@ -0,0 +1,537 @@ + + + + + + + elm.ords.utilities.queued_logging.LocalProcessQueueHandler — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging.LocalProcessQueueHandler

+
+
+class LocalProcessQueueHandler(queue)[source]
+

Bases: QueueHandler

+

QueueHandler that works within a single process (locally).

+

Initialise an instance, using the passed queue.

+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

acquire()

Acquire the I/O thread lock.

addFilter(filter)

Add the specified filter to this handler.

close()

Tidy up any resources used by the handler.

createLock()

Acquire a thread lock for serializing access to the underlying I/O.

emit(record)

Emit record with a location attribute equal to current asyncio task.

enqueue(record)

Enqueue a record.

filter(record)

Determine if a record is loggable by consulting all the filters.

flush()

Ensure all logging output has been flushed.

format(record)

Format the specified record.

get_name()

handle(record)

Conditionally emit the specified logging record.

handleError(record)

Handle errors which occur during an emit() call.

prepare(record)

Prepares a record for queuing.

release()

Release the I/O thread lock.

removeFilter(filter)

Remove the specified filter from this handler.

setFormatter(fmt)

Set the formatter for this handler.

setLevel(level)

Set the logging level of this handler.

set_name(name)

+

Attributes

+ + + + + + +

name

+
+
+emit(record)[source]
+

Emit record with a location attribute equal to current asyncio task.

+
+
Parameters:
+

record (logging.LogRecord) – Log record containing the log message + default attributes. +This record will get a location attribute dynamically +added, with a value equal to the name of the current asyncio +task (i.e. asyncio.current_task().get_name()).

+
+
+
+ +
+
+acquire()
+

Acquire the I/O thread lock.

+
+ +
+
+addFilter(filter)
+

Add the specified filter to this handler.

+
+ +
+
+close()
+

Tidy up any resources used by the handler.

+

This version removes the handler from an internal map of handlers, +_handlers, which is used for handler lookup by name. Subclasses +should ensure that this gets called from overridden close() +methods.

+
+ +
+
+createLock()
+

Acquire a thread lock for serializing access to the underlying I/O.

+
+ +
+
+enqueue(record)
+

Enqueue a record.

+

The base implementation uses put_nowait. You may want to override +this method if you want to use blocking, timeouts or custom queue +implementations.

+
+ +
+
+filter(record)
+

Determine if a record is loggable by consulting all the filters.

+

The default is to allow the record to be logged; any filter can veto +this and the record is then dropped. Returns a zero value if a record +is to be dropped, else non-zero.

+
+

Changed in version 3.2: Allow filters to be just callables.

+
+
+ +
+
+flush()
+

Ensure all logging output has been flushed.

+

This version does nothing and is intended to be implemented by +subclasses.

+
+ +
+
+format(record)
+

Format the specified record.

+

If a formatter is set, use it. Otherwise, use the default formatter +for the module.

+
+ +
+
+handle(record)
+

Conditionally emit the specified logging record.

+

Emission depends on filters which may have been added to the handler. +Wrap the actual emission of the record with acquisition/release of +the I/O thread lock. Returns whether the filter passed the record for +emission.

+
+ +
+
+handleError(record)
+

Handle errors which occur during an emit() call.

+

This method should be called from handlers when an exception is +encountered during an emit() call. If raiseExceptions is false, +exceptions get silently ignored. This is what is mostly wanted +for a logging system - most users will not care about errors in +the logging system, they are more interested in application errors. +You could, however, replace this with a custom handler if you wish. +The record which was being processed is passed in to this method.

+
+ +
+
+prepare(record)
+

Prepares a record for queuing. The object returned by this method is +enqueued.

+

The base implementation formats the record to merge the message +and arguments, and removes unpickleable items from the record +in-place.

+

You might want to override this method if you want to convert +the record to a dict or JSON string, or send a modified copy +of the record while leaving the original intact.

+
+ +
+
+release()
+

Release the I/O thread lock.

+
+ +
+
+removeFilter(filter)
+

Remove the specified filter from this handler.

+
+ +
+
+setFormatter(fmt)
+

Set the formatter for this handler.

+
+ +
+
+setLevel(level)
+

Set the logging level of this handler. level must be an int or a str.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.html b/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.html new file mode 100644 index 00000000..3f927733 --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.html @@ -0,0 +1,348 @@ + + + + + + + elm.ords.utilities.queued_logging.LocationFileLog — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging.LocationFileLog

+
+
+class LocationFileLog(listener, log_dir, location, level='INFO')[source]
+

Bases: object

+

Context manager to write logs for a location to a unique file.

+
+
Parameters:
+
    +
  • listener (LoggingListener) – A listener instance. The file handler will be added to this +listener.

  • +
  • log_dir (path-like) – Path to output directory to contain log file.

  • +
  • location (str) – Location identifier. For example, "El Paso Colorado". +This string will become part of the file name, so it must +contain only characters valid in a file name.

  • +
  • level (str, optional) – Log level. By default, "INFO".

  • +
+
+
+

Methods

+ + + +
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.html b/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.html new file mode 100644 index 00000000..f69e9b5b --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.html @@ -0,0 +1,363 @@ + + + + + + + elm.ords.utilities.queued_logging.LocationFilter — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging.LocationFilter

+
+
+class LocationFilter(location)[source]
+

Bases: Filter

+

Filter down to logs from a coroutine processing a specific location.

+
+
Parameters:
+

location (str) – Location identifier. For example, "El Paso Colorado".

+
+
+

Methods

+ + + + + + +

filter(record)

Filter logging record.

+
+
+filter(record)[source]
+

Filter logging record.

+
+
Parameters:
+

record (logging.LogRecord) – Log record containing the log message + default attributes. +Must have a location attribute that is a string +identifier, or this function will return False every +time. The location identifier will be checked against +the filter’s location attribute to determine the output +result.

+
+
Returns:
+

bool – If the record’s location attribute matches the filter’s +location attribute.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.LogListener.html b/_autosummary/elm.ords.utilities.queued_logging.LogListener.html new file mode 100644 index 00000000..59d59f50 --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.LogListener.html @@ -0,0 +1,376 @@ + + + + + + + elm.ords.utilities.queued_logging.LogListener — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging.LogListener

+
+
+class LogListener(logger_names, level='INFO')[source]
+

Bases: object

+

Class to listen to logging queue from coroutines and write to files.

+
+
Parameters:
+
    +
  • logger_names (iterable) – An iterable of string, where each string is a logger name. +The logger corresponding to each of the names will be +equipped with a logging queue handler.

  • +
  • level (str, optional) – Log level to set for each logger. By default, "INFO".

  • +
+
+
+

Methods

+ + + + + + + + + +

addHandler(handler)

Add a handler to the queue listener.

removeHandler(handler)

Remove a handler from the queue listener.

+
+
+addHandler(handler)[source]
+

Add a handler to the queue listener.

+

Logs that are sent to the queue will be emitted to the handler.

+
+
Parameters:
+

handler (logging.Handler) – Log handler to parse log records.

+
+
+
+ +
+
+removeHandler(handler)[source]
+

Remove a handler from the queue listener.

+

Logs that are sent to the queue will no longer be emitted to the +handler.

+
+
Parameters:
+

handler (logging.Handler) – Log handler to remove from queue listener.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.html b/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.html new file mode 100644 index 00000000..7f29dd9b --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.html @@ -0,0 +1,359 @@ + + + + + + + elm.ords.utilities.queued_logging.NoLocationFilter — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging.NoLocationFilter

+
+
+class NoLocationFilter(name='')[source]
+

Bases: Filter

+

Filter that catches all records without a location attribute.

+

Initialize a filter.

+

Initialize with the name of the logger which, together with its +children, will have its events allowed through the filter. If no +name is specified, allow every event.

+

Methods

+ + + + + + +

filter(record)

Filter logging record.

+
+
+filter(record)[source]
+

Filter logging record.

+
+
Parameters:
+

record (logging.LogRecord) – Log record containing the log message + default attributes. +If the location attribute is missing or is a string in +the form “Task-XX”, the filter returns True (i.e. record +is emitted).

+
+
Returns:
+

bool – If the record’s location attribute is “missing”.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.utilities.queued_logging.html b/_autosummary/elm.ords.utilities.queued_logging.html new file mode 100644 index 00000000..948bbeb9 --- /dev/null +++ b/_autosummary/elm.ords.utilities.queued_logging.html @@ -0,0 +1,345 @@ + + + + + + + elm.ords.utilities.queued_logging — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.utilities.queued_logging

+

ELM Ordinance queued logging.

+

This module implements queued logging, mostly following this blog:” +https://www.zopatista.com/python/2019/05/11/asyncio-logging/

+

Classes

+ + + + + + + + + + + + + + + + + + +

LocalProcessQueueHandler(queue)

QueueHandler that works within a single process (locally).

LocationFileLog(listener, log_dir, location)

Context manager to write logs for a location to a unique file.

LocationFilter(location)

Filter down to logs from a coroutine processing a specific location.

LogListener(logger_names[, level])

Class to listen to logging queue from coroutines and write to files.

NoLocationFilter([name])

Filter that catches all records without a location attribute.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.content.ValidationWithMemory.html b/_autosummary/elm.ords.validation.content.ValidationWithMemory.html new file mode 100644 index 00000000..310cdb07 --- /dev/null +++ b/_autosummary/elm.ords.validation.content.ValidationWithMemory.html @@ -0,0 +1,389 @@ + + + + + + + elm.ords.validation.content.ValidationWithMemory — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.content.ValidationWithMemory

+
+
+class ValidationWithMemory(structured_llm_caller, text_chunks, num_to_recall=2)[source]
+

Bases: object

+

Validate a set of text chunks by sometimes looking at previous chunks

+
+
Parameters:
+
    +
  • structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

  • +
  • text_chunks (list of str) – List of strings, each of which represent a chunk of text. +The order of the strings should be the order of the text +chunks. This validator may refer to previous text chunks to +answer validation questions.

  • +
  • num_to_recall (int, optional) – Number of chunks to check for each validation call. This +includes the original chunk! For example, if +num_to_recall=2, the validator will first check the chunk +at the requested index, and then the previous chunk as well. +By default, 2.

  • +
+
+
+

Methods

+ + + + + + +

parse_from_ind(ind, prompt, key)

Validate a chunk of text.

+
+
+async parse_from_ind(ind, prompt, key)[source]
+

Validate a chunk of text.

+

Validation occurs by querying the LLM using the input prompt and +parsing the key from the response JSON. The prompt should +request that the key be a boolean output. If the key retrieved +from the LLM response is False, a number of previous text chunks +are checked as well, using the same prompt. This can be helpful +in cases where the answer to the validation prompt (e.g. does +this text pertain to a large WECS?) is only found in a previous +text chunk.

+
+
Parameters:
+
    +
  • ind (int) – Positive integer corresponding to the chunk index. +Must be less than len(text_chunks).

  • +
  • prompt (str) – Input LLM system prompt that describes the validation +question. This should request a JSON output from the LLM. +It should also take key as a formatting input.

  • +
  • key (str) – A key expected in the JSON output of the LLM containing the +response for the validation question. This string will also +be used to format the system prompt before it is passed to +the LLM.

  • +
+
+
Returns:
+

boolTrue if the LLM returned True for this text chunk or +num_to_recall-1 text chunks before it. +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.content.html b/_autosummary/elm.ords.validation.content.html new file mode 100644 index 00000000..912a5d16 --- /dev/null +++ b/_autosummary/elm.ords.validation.content.html @@ -0,0 +1,341 @@ + + + + + + + elm.ords.validation.content — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.content

+

ELM Ordinance document content Validation logic

+

These are primarily used to validate that a legal document applies to a +particular technology (e.g. Large Wind Energy Conversion Systems).

+

Functions

+ + + + + + +

possibly_mentions_wind(text[, ...])

Perform a heuristic check for mention of wind energy in text.

+

Classes

+ + + + + + +

ValidationWithMemory(structured_llm_caller, ...)

Validate a set of text chunks by sometimes looking at previous chunks

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.content.possibly_mentions_wind.html b/_autosummary/elm.ords.validation.content.possibly_mentions_wind.html new file mode 100644 index 00000000..65fb7514 --- /dev/null +++ b/_autosummary/elm.ords.validation.content.possibly_mentions_wind.html @@ -0,0 +1,349 @@ + + + + + + + elm.ords.validation.content.possibly_mentions_wind — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.content.possibly_mentions_wind

+
+
+possibly_mentions_wind(text, match_count_threshold=1)[source]
+

Perform a heuristic check for mention of wind energy in text.

+

This check first strips the text of any wind “look-alike” words +(e.g. “window”, “windshield”, etc). Then, it checks for particular +keywords, acronyms, and phrases that pertain to wind in the text. +If enough keywords are mentions (as dictated by +match_count_threshold), this check returns True.

+
+
Parameters:
+
    +
  • text (str) – Input text that may or may not mention win in relation to wind +energy.

  • +
  • match_count_threshold (int, optional) – Number of keywords that must match for the text to pass this +heuristic check. Count must be strictly greater than this value. +By default, 1.

  • +
+
+
Returns:
+

boolTrue if the number of keywords/acronyms/phrases detected +exceeds the match_count_threshold.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.html b/_autosummary/elm.ords.validation.html new file mode 100644 index 00000000..38915444 --- /dev/null +++ b/_autosummary/elm.ords.validation.html @@ -0,0 +1,332 @@ + + + + + + + elm.ords.validation — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation

+

ELM ordinance document content and source validation.

+ + + + + + + + + +

elm.ords.validation.content

ELM Ordinance document content Validation logic

elm.ords.validation.location

ELM Ordinance Location Validation logic

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.html b/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.html new file mode 100644 index 00000000..0df785d6 --- /dev/null +++ b/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.html @@ -0,0 +1,377 @@ + + + + + + + elm.ords.validation.location.CountyJurisdictionValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location.CountyJurisdictionValidator

+
+
+class CountyJurisdictionValidator(structured_llm_caller)[source]
+

Bases: FixedMessageValidator

+

Validator that checks wether text applies at the county level.

+
+
Parameters:
+

structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

+
+
+

Methods

+ + + + + + +

check(content, **fmt_kwargs)

Check if the content passes the validation.

+

Attributes

+ + + + + + +

SYSTEM_MESSAGE

LLM system message describing validation task.

+
+
+SYSTEM_MESSAGE = "You extract structured data from legal text. Return your answer in JSON format. Your JSON file must include exactly three keys. The first key is 'x', which is a boolean that is set to `True` if the text excerpt explicitly mentions that the regulations within apply to a jurisdiction scope other than {county} County (i.e. they apply to a subdivision like a township or a city, or they apply more broadly, like to a state or the full country). `False` if the regulations in the text apply at the {county} County level, if the regulations in the text apply to all unincorporated areas of {county} County, or if there is not enough information to determine the answer. The second key is 'y', which is a boolean that is set to `True` if the text excerpt explicitly mentions that the regulations within apply to more than one county. `False` if the regulations in the text excerpt apply to a single county only or if there is not enough information to determine the answer. The third key is 'explanation', which is a string that contains a short explanation if you chose `True` for any answers above."
+

LLM system message describing validation task.

+
+ +
+
+async check(content, **fmt_kwargs)
+

Check if the content passes the validation.

+

The exact validation is outlined in the class SYSTEM_MESSAGE.

+
+
Parameters:
+
    +
  • content (str) – Document content to validate.

  • +
  • **fmt_kwargs – Keyword arguments to be passed to SYSTEM_MESSAGE.format().

  • +
+
+
Returns:
+

boolTrue if the content passes the validation check, +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.CountyNameValidator.html b/_autosummary/elm.ords.validation.location.CountyNameValidator.html new file mode 100644 index 00000000..a92e72f0 --- /dev/null +++ b/_autosummary/elm.ords.validation.location.CountyNameValidator.html @@ -0,0 +1,377 @@ + + + + + + + elm.ords.validation.location.CountyNameValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location.CountyNameValidator

+
+
+class CountyNameValidator(structured_llm_caller)[source]
+

Bases: FixedMessageValidator

+

Validator that checks wether text applies to a particular county.

+
+
Parameters:
+

structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

+
+
+

Methods

+ + + + + + +

check(content, **fmt_kwargs)

Check if the content passes the validation.

+

Attributes

+ + + + + + +

SYSTEM_MESSAGE

LLM system message describing validation task.

+
+
+SYSTEM_MESSAGE = "You extract structured data from legal text. Return your answer in JSON format. Your JSON file must include exactly three keys. The first key is 'wrong_county', which is a boolean that is set to `True` if the legal text is not for {county} County. Do not infer based on any information about any US state, city, township, or otherwise. `False` if the text applies to {county} County or if there is not enough information to determine the answer. The second key is 'wrong_state', which is a boolean that is set to `True` if the legal text is not for a county in {state} State. Do not infer based on any information about any US county, city, township, or otherwise. `False` if the text applies to a county in {state} State or if there is not enough information to determine the answer. The third key is 'explanation', which is a string that contains a short explanation if you chose `True` for any answers above."
+

LLM system message describing validation task.

+
+ +
+
+async check(content, **fmt_kwargs)
+

Check if the content passes the validation.

+

The exact validation is outlined in the class SYSTEM_MESSAGE.

+
+
Parameters:
+
    +
  • content (str) – Document content to validate.

  • +
  • **fmt_kwargs – Keyword arguments to be passed to SYSTEM_MESSAGE.format().

  • +
+
+
Returns:
+

boolTrue if the content passes the validation check, +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.CountyValidator.html b/_autosummary/elm.ords.validation.location.CountyValidator.html new file mode 100644 index 00000000..762382ae --- /dev/null +++ b/_autosummary/elm.ords.validation.location.CountyValidator.html @@ -0,0 +1,371 @@ + + + + + + + elm.ords.validation.location.CountyValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location.CountyValidator

+
+
+class CountyValidator(structured_llm_caller, score_thresh=0.8)[source]
+

Bases: object

+

ELM Ords County validator.

+

Combines the logic of several validators into a single class.

+
+
Parameters:
+
    +
  • structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

  • +
  • score_thresh (float, optional) – Score threshold to exceed when voting on content from raw +pages. By default, 0.8.

  • +
+
+
+

Methods

+ + + + + + +

check(doc, county, state)

Check if the document belongs to the county.

+
+
+async check(doc, county, state)[source]
+

Check if the document belongs to the county.

+
+
Parameters:
+
    +
  • doc (elm.web.document.BaseDocument) – Document instance. Should contain a “source” key in the +metadata that contains a URL (used for the URL validation +check). Raw content will be parsed for county name and +correct jurisdiction.

  • +
  • county (str) – County that document should belong to.

  • +
  • state (str) – State corresponding to county input.

  • +
+
+
Returns:
+

boolTrue if the doc contents pertain to the input county. +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.FixedMessageValidator.html b/_autosummary/elm.ords.validation.location.FixedMessageValidator.html new file mode 100644 index 00000000..622763b3 --- /dev/null +++ b/_autosummary/elm.ords.validation.location.FixedMessageValidator.html @@ -0,0 +1,377 @@ + + + + + + + elm.ords.validation.location.FixedMessageValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location.FixedMessageValidator

+
+
+class FixedMessageValidator(structured_llm_caller)[source]
+

Bases: ABC

+

Validation base class using a static system prompt.

+
+
Parameters:
+

structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

+
+
+

Methods

+ + + + + + +

check(content, **fmt_kwargs)

Check if the content passes the validation.

+

Attributes

+ + + + + + +

SYSTEM_MESSAGE

LLM system message describing validation task.

+
+
+SYSTEM_MESSAGE = None
+

LLM system message describing validation task.

+
+ +
+
+async check(content, **fmt_kwargs)[source]
+

Check if the content passes the validation.

+

The exact validation is outlined in the class SYSTEM_MESSAGE.

+
+
Parameters:
+
    +
  • content (str) – Document content to validate.

  • +
  • **fmt_kwargs – Keyword arguments to be passed to SYSTEM_MESSAGE.format().

  • +
+
+
Returns:
+

boolTrue if the content passes the validation check, +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.URLValidator.html b/_autosummary/elm.ords.validation.location.URLValidator.html new file mode 100644 index 00000000..1eda417a --- /dev/null +++ b/_autosummary/elm.ords.validation.location.URLValidator.html @@ -0,0 +1,377 @@ + + + + + + + elm.ords.validation.location.URLValidator — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location.URLValidator

+
+
+class URLValidator(structured_llm_caller)[source]
+

Bases: FixedMessageValidator

+

Validator that checks wether a URL matches a county.

+
+
Parameters:
+

structured_llm_caller (elm.ords.llm.StructuredLLMCaller) – StructuredLLMCaller instance. Used for structured validation +queries.

+
+
+

Methods

+ + + + + + +

check(content, **fmt_kwargs)

Check if the content passes the validation.

+

Attributes

+ + + + + + +

SYSTEM_MESSAGE

LLM system message describing validation task.

+
+
+SYSTEM_MESSAGE = "You extract structured data from a URL. Return your answer in JSON format. Your JSON file must include exactly two keys. The first key is 'correct_county', which is a boolean that is set to `True` if the URL mentions {county} County in some way. DO NOT infer based on information in the URL about any US state, city, township, or otherwise. `False` if not sure. The second key is 'correct_state', which is a boolean that is set to `True` if the URL mentions {state} State in some way. DO NOT infer based on information in the URL about any US county, city, township, or otherwise. `False` if not sure."
+

LLM system message describing validation task.

+
+ +
+
+async check(content, **fmt_kwargs)
+

Check if the content passes the validation.

+

The exact validation is outlined in the class SYSTEM_MESSAGE.

+
+
Parameters:
+
    +
  • content (str) – Document content to validate.

  • +
  • **fmt_kwargs – Keyword arguments to be passed to SYSTEM_MESSAGE.format().

  • +
+
+
Returns:
+

boolTrue if the content passes the validation check, +False otherwise.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.ords.validation.location.html b/_autosummary/elm.ords.validation.location.html new file mode 100644 index 00000000..d26a2eb6 --- /dev/null +++ b/_autosummary/elm.ords.validation.location.html @@ -0,0 +1,345 @@ + + + + + + + elm.ords.validation.location — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.ords.validation.location

+

ELM Ordinance Location Validation logic

+

These are primarily used to validate that a legal document applies to a +particular location.

+

Classes

+ + + + + + + + + + + + + + + + + + +

CountyJurisdictionValidator(...)

Validator that checks wether text applies at the county level.

CountyNameValidator(structured_llm_caller)

Validator that checks wether text applies to a particular county.

CountyValidator(structured_llm_caller[, ...])

ELM Ords County validator.

FixedMessageValidator(structured_llm_caller)

Validation base class using a static system prompt.

URLValidator(structured_llm_caller)

Validator that checks wether a URL matches a county.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.osti.OstiList.html b/_autosummary/elm.osti.OstiList.html new file mode 100644 index 00000000..6579573f --- /dev/null +++ b/_autosummary/elm.osti.OstiList.html @@ -0,0 +1,529 @@ + + + + + + + elm.osti.OstiList — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.osti.OstiList

+
+
+class OstiList(url, n_pages=1)[source]
+

Bases: list

+

Class to retrieve and handle multiple OSTI records from an API URL.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OSTI API URL to request, see this for details:

    https://www.osti.gov/api/v1/docs

    +
    +
    +
  • +
  • n_pages (int) – Number of pages to get from the API. Typical response has 20 +entries per page. Default of 1 ensures that this class doesnt hang +on a million responses.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

append(object, /)

Append object to the end of the list.

clear()

Remove all items from list.

copy()

Return a shallow copy of the list.

count(value, /)

Return number of occurrences of value.

download(out_dir)

Download all PDFs from the records in this OSTI object into a directory.

extend(iterable, /)

Extend list by appending elements from the iterable.

from_osti_ids(oids)

Initialize OSTI records from one or more numerical IDS

index(value[, start, stop])

Return first index of value.

insert(index, object, /)

Insert object before index.

pop([index])

Remove and return item at index (default last).

remove(value, /)

Remove first occurrence of value.

reverse()

Reverse IN PLACE.

sort(*[, key, reverse])

Sort the list in ascending order and return None.

+

Attributes

+ + + + + + + + + +

BASE_URL

Base OSTI API URL.

meta

Get a meta dataframe with details on all of the OSTI records.

+
+
+BASE_URL = 'https://www.osti.gov/api/v1/records'
+

Base OSTI API URL. This can be appended with search parameters

+
+ +
+
+download(out_dir)[source]
+

Download all PDFs from the records in this OSTI object into a +directory. PDFs will be given file names based on their OSTI record ID

+
+
Parameters:
+

out_dir (str) – Directory to download PDFs to. This directory will be created if it +does not already exist.

+
+
+
+ +
+
+property meta
+

Get a meta dataframe with details on all of the OSTI records.

+
+
Returns:
+

pd.DataFrame

+
+
+
+ +
+
+__add__(value, /)
+

Return self+value.

+
+ +
+
+__mul__(value, /)
+

Return self*value.

+
+ +
+
+append(object, /)
+

Append object to the end of the list.

+
+ +
+
+clear()
+

Remove all items from list.

+
+ +
+
+copy()
+

Return a shallow copy of the list.

+
+ +
+
+count(value, /)
+

Return number of occurrences of value.

+
+ +
+
+extend(iterable, /)
+

Extend list by appending elements from the iterable.

+
+ +
+
+classmethod from_osti_ids(oids)[source]
+

Initialize OSTI records from one or more numerical IDS

+
+
Parameters:
+

oids (list) – List of string or integer OSTI IDs which are typically 7 digit +numbers

+
+
Returns:
+

out (OstiList) – OstiList object with entries for each oid input.

+
+
+
+ +
+
+index(value, start=0, stop=9223372036854775807, /)
+

Return first index of value.

+

Raises ValueError if the value is not present.

+
+ +
+
+insert(index, object, /)
+

Insert object before index.

+
+ +
+
+pop(index=-1, /)
+

Remove and return item at index (default last).

+

Raises IndexError if list is empty or index is out of range.

+
+ +
+
+remove(value, /)
+

Remove first occurrence of value.

+

Raises ValueError if the value is not present.

+
+ +
+
+reverse()
+

Reverse IN PLACE.

+
+ +
+
+sort(*, key=None, reverse=False)
+

Sort the list in ascending order and return None.

+

The sort is in-place (i.e. the list itself is modified) and stable (i.e. the +order of two equal elements is maintained).

+

If a key function is given, apply it once to each list item and sort them, +ascending or descending, according to their function values.

+

The reverse flag can be set to sort in descending order.

+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.osti.OstiRecord.html b/_autosummary/elm.osti.OstiRecord.html new file mode 100644 index 00000000..1e0530e0 --- /dev/null +++ b/_autosummary/elm.osti.OstiRecord.html @@ -0,0 +1,563 @@ + + + + + + + elm.osti.OstiRecord — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.osti.OstiRecord

+
+
+class OstiRecord(record)[source]
+

Bases: dict

+

Class to handle a single OSTI record as dictionary data

+
+
Parameters:
+

record (dict) – OSTI record in dict form, typically a response from OSTI API.

+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

clear()

copy()

download(fp)

Download the PDF of this record

fromkeys([value])

Create a new dictionary with keys from iterable and values set to value.

get(key[, default])

Return the value for key if key is in the dictionary, else default.

items()

keys()

pop(k[,d])

If key is not found, default is returned if given, otherwise KeyError is raised

popitem()

Remove and return a (key, value) pair as a 2-tuple.

setdefault(key[, default])

Insert key with a value of default if key is not in the dictionary.

strip_nested_brackets(text)

Remove text between brackets/parentheses for cleaning OSTI text

update([E, ]**F)

If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v In either case, this is followed by: for k in F: D[k] = F[k]

values()

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + +

authors

Get the list of authors of this record.

date

Get the date of publication of this record

doi

Get the DOI of this record

osti_id

Get the OSTI ID of this record which is typically a 7 digit number

title

Get the title of this record

url

Get the download URL of this record

year

Get the year of publication of this record

+
+
+static strip_nested_brackets(text)[source]
+

Remove text between brackets/parentheses for cleaning OSTI text

+
+ +
+
+property authors
+

Get the list of authors of this record.

+
+
Returns:
+

str

+
+
+
+ +
+
+property title
+

Get the title of this record

+
+
Returns:
+

str | None

+
+
+
+ +
+
+property year
+

Get the year of publication of this record

+
+
Returns:
+

str | None

+
+
+
+ +
+
+property date
+

Get the date of publication of this record

+
+
Returns:
+

str | None

+
+
+
+ +
+
+property doi
+

Get the DOI of this record

+
+
Returns:
+

str | None

+
+
+
+ +
+
+property osti_id
+

Get the OSTI ID of this record which is typically a 7 digit number

+
+
Returns:
+

str | None

+
+
+
+ +
+
+property url
+

Get the download URL of this record

+
+
Returns:
+

str | None

+
+
+
+ +
+
+download(fp)[source]
+

Download the PDF of this record

+
+
Parameters:
+

fp (str) – Filepath to download this record to, typically a .pdf

+
+
+
+ +
+
+clear() None.  Remove all items from D.
+
+ +
+
+copy() a shallow copy of D
+
+ +
+
+fromkeys(value=None, /)
+

Create a new dictionary with keys from iterable and values set to value.

+
+ +
+
+get(key, default=None, /)
+

Return the value for key if key is in the dictionary, else default.

+
+ +
+
+items() a set-like object providing a view on D's items
+
+ +
+
+keys() a set-like object providing a view on D's keys
+
+ +
+
+pop(k[, d]) v, remove specified key and return the corresponding value.
+

If key is not found, default is returned if given, otherwise KeyError is raised

+
+ +
+
+popitem()
+

Remove and return a (key, value) pair as a 2-tuple.

+

Pairs are returned in LIFO (last-in, first-out) order. +Raises KeyError if the dict is empty.

+
+ +
+
+setdefault(key, default=None, /)
+

Insert key with a value of default if key is not in the dictionary.

+

Return the value for key if key is in the dictionary, else default.

+
+ +
+
+update([E, ]**F) None.  Update D from dict/iterable E and F.
+

If E is present and has a .keys() method, then does: for k in E: D[k] = E[k] +If E is present and lacks a .keys() method, then does: for k, v in E: D[k] = v +In either case, this is followed by: for k in F: D[k] = F[k]

+
+ +
+
+values() an object providing a view on D's values
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.osti.html b/_autosummary/elm.osti.html new file mode 100644 index 00000000..c0a69a3a --- /dev/null +++ b/_autosummary/elm.osti.html @@ -0,0 +1,332 @@ + + + + + + + elm.osti — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.osti

+

Utilities for retrieving data from OSTI.

+

Classes

+ + + + + + + + + +

OstiList(url[, n_pages])

Class to retrieve and handle multiple OSTI records from an API URL.

OstiRecord(record)

Class to handle a single OSTI record as dictionary data

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.pdf.PDFtoTXT.html b/_autosummary/elm.pdf.PDFtoTXT.html new file mode 100644 index 00000000..1b627e25 --- /dev/null +++ b/_autosummary/elm.pdf.PDFtoTXT.html @@ -0,0 +1,824 @@ + + + + + + + elm.pdf.PDFtoTXT — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.pdf.PDFtoTXT

+
+
+class PDFtoTXT(fp, page_range=None, model=None)[source]
+

Bases: ApiBase

+

Class to parse text from a PDF document.

+
+
Parameters:
+
    +
  • fp (str) – Filepath to .pdf file to extract.

  • +
  • page_range (None | list) – Optional 2-entry list/tuple to set starting and ending pages +(python indexing)

  • +
  • model (None | str) – Optional specification of OpenAI model to use. Default is +cls.DEFAULT_MODEL

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, temperature])

Have a continuous chat with the LLM including context from previous chat() calls stored as attributes in this class.

clean_headers([char_thresh, page_thresh, ...])

Clean headers/footers that are duplicated across pages

clean_poppler([layout])

Clean the pdf using the poppler pdftotxt utility

clean_txt()

Use GPT to clean raw pdf text in serial calls to the OpenAI API.

clean_txt_async([ignore_error, rate_limit])

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

clear()

Clear chat history and reduce messages to just the initial model role message.

count_tokens(text, model)

Return the number of tokens in a string.

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

is_double_col([separator])

Does the text look like it has multiple vertical text columns?

load_pdf(page_range)

Basic load of pdf to text strings

make_gpt_messages(pdf_raw_text)

Make the chat completion messages list for input to GPT

validate_clean()

Run some basic checks on the GPT cleaned text vs.

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do pdf text cleaning.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_INSTRUCTION

Instructions to the model with python format braces for pdf text

MODEL_ROLE

High level model role.

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

+
+
+MODEL_ROLE = 'You clean up poorly formatted text extracted from PDF documents.'
+

High level model role.

+
+ +
+
+MODEL_INSTRUCTION = 'Text extracted from a PDF: \n"""\n{}\n"""\n\nThe text above was extracted from a PDF document. Can you make it nicely formatted? Please only return the formatted text without comments or added information.'
+

Instructions to the model with python format braces for pdf text

+
+ +
+
+load_pdf(page_range)[source]
+

Basic load of pdf to text strings

+
+
Parameters:
+

page_range (None | list) – Optional 2-entry list/tuple to set starting and ending pages +(python indexing)

+
+
Returns:
+

out (list) – List of strings where each entry is a page. This is the raw PDF +text before GPT cleaning

+
+
+
+ +
+
+make_gpt_messages(pdf_raw_text)[source]
+

Make the chat completion messages list for input to GPT

+
+
Parameters:
+

pdf_raw_text (str) – Raw PDF text to be cleaned

+
+
Returns:
+

messages (list) – Messages for OpenAI chat completion model. Typically this looks +like this:

+
+
+
[{“role”: “system”, “content”: “You do this…”},

{“role”: “user”, “content”: “Please do this: {}”}]

+
+
+
+

+
+
+
+ +
+
+clean_txt()[source]
+

Use GPT to clean raw pdf text in serial calls to the OpenAI API.

+
+
Returns:
+

clean_pages (list) – List of clean text strings where each list entry is a page from the +PDF

+
+
+
+ +
+
+async clean_txt_async(ignore_error=None, rate_limit=40000.0)[source]
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

clean_pages (list) – List of clean text strings where each list entry is a page from the +PDF

+
+
+
+ +
+
+is_double_col(separator='    ')[source]
+

Does the text look like it has multiple vertical text columns?

+
+
Parameters:
+

separator (str) – Heuristic split string to look for spaces between columns

+
+
Returns:
+

out (bool) – True if more than one vertical text column

+
+
+
+ +
+
+clean_poppler(layout=True)[source]
+

Clean the pdf using the poppler pdftotxt utility

+
+
Requires the pdftotext command line utility from this software:

https://poppler.freedesktop.org/

+
+
+
+
Parameters:
+

layout (bool) – Layout flag for poppler pdftotxt utility: “maintain original +physical layout”. Layout=True works well for single column text, +layout=False collapses the double columns into single columns which +works better for downstream chunking and LLM work.

+
+
Returns:
+

out (str) – Joined cleaned pages

+
+
+
+ +
+
+DEFAULT_MODEL = 'gpt-3.5-turbo'
+

Default model to do pdf text cleaning.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async static call_api(url, headers, request_json)
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+chat(query, temperature=0)
+

Have a continuous chat with the LLM including context from previous +chat() calls stored as attributes in this class.

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+clear()
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+static count_tokens(text, model)
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+classmethod get_embedding(text)
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+
+validate_clean()[source]
+

Run some basic checks on the GPT cleaned text vs. the raw text

+
+ +
+
+clean_headers(char_thresh=0.6, page_thresh=0.8, split_on='\n', iheaders=(0, 1, -2, -1))[source]
+

Clean headers/footers that are duplicated across pages

+
+
Parameters:
+
    +
  • char_thresh (float) – Fraction of characters in a given header that are similar between +pages to be considered for removal

  • +
  • page_thresh (float) – Fraction of pages that share the header to be considered for +removal

  • +
  • split_on (str) – Chars to split lines of a page on

  • +
  • iheaders (list | tuple) – Integer indices to look for headers after splitting a page into +lines based on split_on. This needs to go from the start of the +page to the end.

  • +
+
+
Returns:
+

out (str) – Clean text with all pages joined

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.pdf.html b/_autosummary/elm.pdf.html new file mode 100644 index 00000000..cc80ca59 --- /dev/null +++ b/_autosummary/elm.pdf.html @@ -0,0 +1,329 @@ + + + + + + + elm.pdf — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.pdf

+

ELM PDF to text parser

+

Classes

+ + + + + + +

PDFtoTXT(fp[, page_range, model])

Class to parse text from a PDF document.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.summary.Summary.html b/_autosummary/elm.summary.Summary.html new file mode 100644 index 00000000..15bf9834 --- /dev/null +++ b/_autosummary/elm.summary.Summary.html @@ -0,0 +1,742 @@ + + + + + + + elm.summary.Summary — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.summary.Summary

+
+
+class Summary(text, model=None, n_words=500, **chunk_kwargs)[source]
+

Bases: ApiBase

+

Interface to perform Recursive Summarization and Distillation of +research text

+
+
Parameters:
+
    +
  • text (str | list) – Single body of text to chunk up using elm.Chunker or a pre-chunked +list of strings. Works well if this is a single document with empty +lines between paragraphs.

  • +
  • model (str) – GPT model name, default is the DEFAULT_MODEL global var

  • +
  • n_words (int) – Desired length of the output text. Note that this is never perfect +but helps guide the LLM to an approximate desired output length. +400-600 words seems to work quite well with GPT-4. This gets +formatted into the MODEL_INSTRUCTION attribute.

  • +
  • chunk_kwargs (dict | None) – kwargs for initialization of elm.chunk.Chunker

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, temperature])

Have a continuous chat with the LLM including context from previous chat() calls stored as attributes in this class.

clear()

Clear chat history and reduce messages to just the initial model role message.

combine(text_summary)

Combine separate chunk summaries into one more comprehensive narrative

count_tokens(text, model)

Return the number of tokens in a string.

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

run([temperature, fancy_combine])

Use GPT to do a summary of input text.

run_async([temperature, ignore_error, ...])

Run text summary asynchronously for all text chunks

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do pdf text cleaning.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_INSTRUCTION

Prefix to the engineered prompt.

MODEL_ROLE

High level model role, somewhat redundant to MODEL_INSTRUCTION

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

+
+
+MODEL_ROLE = 'You are an energy scientist summarizing prior research'
+

High level model role, somewhat redundant to MODEL_INSTRUCTION

+
+ +
+
+MODEL_INSTRUCTION = 'Can you please summarize the text quoted above in {n_words} words?\n\n"""\n{text_chunk}\n"""'
+

Prefix to the engineered prompt. The format args text_chunk and +n_words will be formatted by the Summary class at runtime. text_chunk +will be provided by the Summary text chunks, n_words is an initialization +argument for the Summary class.

+
+ +
+
+DEFAULT_MODEL = 'gpt-3.5-turbo'
+

Default model to do pdf text cleaning.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async static call_api(url, headers, request_json)
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+chat(query, temperature=0)
+

Have a continuous chat with the LLM including context from previous +chat() calls stored as attributes in this class.

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+clear()
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+static count_tokens(text, model)
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+classmethod get_embedding(text)
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+
+combine(text_summary)[source]
+

Combine separate chunk summaries into one more comprehensive +narrative

+
+
Parameters:
+

summary (str) – Summary of text. May be several disjointed paragraphs

+
+
Returns:
+

summary (str) – Summary of text. Paragraphs will be more cohesive.

+
+
+
+ +
+
+run(temperature=0, fancy_combine=True)[source]
+

Use GPT to do a summary of input text.

+
+
Parameters:
+
    +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • fancy_combine (bool) – Flag to use the GPT model to combine the separate outputs into a +cohesive summary.

  • +
+
+
Returns:
+

summary (str) – Summary of text.

+
+
+
+ +
+
+async run_async(temperature=0, ignore_error=None, rate_limit=40000.0, fancy_combine=True)[source]
+

Run text summary asynchronously for all text chunks

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
  • fancy_combine (bool) – Flag to use the GPT model to combine the separate outputs into a +cohesive summary.

  • +
+
+
Returns:
+

summary (str) – Summary of text.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.summary.html b/_autosummary/elm.summary.html new file mode 100644 index 00000000..ce613b3e --- /dev/null +++ b/_autosummary/elm.summary.html @@ -0,0 +1,329 @@ + + + + + + + elm.summary — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.summary

+

Research Summarization and Distillation with LLMs

+

Classes

+ + + + + + +

Summary(text[, model, n_words])

Interface to perform Recursive Summarization and Distillation of research text

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.tree.DecisionTree.html b/_autosummary/elm.tree.DecisionTree.html new file mode 100644 index 00000000..5051334f --- /dev/null +++ b/_autosummary/elm.tree.DecisionTree.html @@ -0,0 +1,485 @@ + + + + + + + elm.tree.DecisionTree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.tree.DecisionTree

+
+
+class DecisionTree(graph)[source]
+

Bases: object

+

Class to traverse a directed graph of LLM prompts. Nodes are +prompts and edges are transitions between prompts based on conditions +being met in the LLM response.

+

Class to traverse a directed graph of LLM prompts. Nodes are +prompts and edges are transitions between prompts based on conditions +being met in the LLM response.

+

Examples

+

Here’s a simple example to setup a decision tree graph and run with the +DecisionTree class:

+
>>> import logging
+>>> import networkx as nx
+>>> from rex import init_logger
+>>> from elm.base import ApiBase
+>>> from elm.tree import DecisionTree
+>>>
+>>> init_logger('elm.tree')
+>>>
+>>> G = nx.DiGraph(text='hello', name='Grant',
+                   api=ApiBase(model='gpt-35-turbo'))
+>>>
+>>> G.add_node('init', prompt='Say {text} to {name}')
+>>> G.add_edge('init', 'next', condition=lambda x: 'Grant' in x)
+>>> G.add_node('next', prompt='How are you?')
+>>>
+>>> tree = DecisionTree(G)
+>>> out = tree.run()
+>>>
+>>> print(tree.all_messages_txt)
+
+
+
+
Parameters:
+

graph (nx.DiGraph) – Directed acyclic graph where nodes are LLM prompts and edges are +logical transitions based on the response. Must have high-level +graph attribute “api” which is an ApiBase instance. Nodes should +have attribute “prompt” which can have {format} named arguments +that will be filled from the high-level graph attributes. Edges can +have attribute “condition” that is a callable to be executed on the +LLM response text. An edge from a node without a condition acts as +an “else” statement if no other edge conditions are satisfied. A +single edge from node to node does not need a condition.

+
+
+

Methods

+ + + + + + + + + +

call_node(node0)

Call the LLM with the prompt from the input node and search the successor edges for a valid transition condition

run([node0])

Traverse the decision tree starting at the input node.

+

Attributes

+ + + + + + + + + + + + + + + + + + +

all_messages_txt

Get a printout of the full conversation with the LLM

api

Get the ApiBase object.

graph

Get the networkx graph object

history

Get a record of the nodes traversed in the tree

messages

Get a list of the conversation messages with the LLM.

+
+
+property api
+

Get the ApiBase object.

+
+
Returns:
+

ApiBase

+
+
+
+ +
+
+property messages
+

Get a list of the conversation messages with the LLM.

+
+
Returns:
+

list

+
+
+
+ +
+
+property all_messages_txt
+

Get a printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+property history
+

Get a record of the nodes traversed in the tree

+
+
Returns:
+

list

+
+
+
+ +
+
+property graph
+

Get the networkx graph object

+
+
Returns:
+

nx.DiGraph

+
+
+
+ +
+
+call_node(node0)[source]
+

Call the LLM with the prompt from the input node and search the +successor edges for a valid transition condition

+
+
Parameters:
+

node0 (str) – Name of node being executed.

+
+
Returns:
+

out (str) – Next node or LLM response if at a leaf node.

+
+
+
+ +
+
+run(node0='init')[source]
+

Traverse the decision tree starting at the input node.

+
+
Parameters:
+

node0 (str) – Name of starting node in the graph. This is typically called “init”

+
+
Returns:
+

out (str) – Final response from LLM at the leaf node.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.tree.html b/_autosummary/elm.tree.html new file mode 100644 index 00000000..44cd9d81 --- /dev/null +++ b/_autosummary/elm.tree.html @@ -0,0 +1,329 @@ + + + + + + + elm.tree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.tree

+

ELM decision trees.

+

Classes

+ + + + + + +

DecisionTree(graph)

Class to traverse a directed graph of LLM prompts.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.html b/_autosummary/elm.utilities.html new file mode 100644 index 00000000..e0d96fe9 --- /dev/null +++ b/_autosummary/elm.utilities.html @@ -0,0 +1,331 @@ + + + + + + + elm.utilities — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities

+

ELM utility classes and functions.

+ + + + + + + + + +

elm.utilities.parse

ELM parsing utilities.

elm.utilities.retry

ELM retry utilities.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.clean_headers.html b/_autosummary/elm.utilities.parse.clean_headers.html new file mode 100644 index 00000000..d67a2367 --- /dev/null +++ b/_autosummary/elm.utilities.parse.clean_headers.html @@ -0,0 +1,348 @@ + + + + + + + elm.utilities.parse.clean_headers — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.clean_headers

+
+
+clean_headers(pages, char_thresh=0.6, page_thresh=0.8, split_on='\n', iheaders=(0, 1, -2, -1))[source]
+

Clean headers/footers that are duplicated across pages of a document.

+

Note that this function will update the items within the pages +input.

+
+
Parameters:
+
    +
  • pages (list) – List of pages (as str) from document.

  • +
  • char_thresh (float) – Fraction of characters in a given header that are similar +between pages to be considered for removal

  • +
  • page_thresh (float) – Fraction of pages that share the header to be considered for +removal

  • +
  • split_on (str) – Chars to split lines of a page on

  • +
  • iheaders (list | tuple) – Integer indices to look for headers after splitting a page into +lines based on split_on. This needs to go from the start of the +page to the end.

  • +
+
+
Returns:
+

out (str) – Clean text with all pages joined

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.combine_pages.html b/_autosummary/elm.utilities.parse.combine_pages.html new file mode 100644 index 00000000..f5dbec03 --- /dev/null +++ b/_autosummary/elm.utilities.parse.combine_pages.html @@ -0,0 +1,336 @@ + + + + + + + elm.utilities.parse.combine_pages — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.combine_pages

+
+
+combine_pages(pages)[source]
+

Combine pages of GPT cleaned text into a single string.

+
+
Parameters:
+

pages (list) – List of pages (as str) from document.

+
+
Returns:
+

full (str) – Single multi-page string

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.format_html_tables.html b/_autosummary/elm.utilities.parse.format_html_tables.html new file mode 100644 index 00000000..084c114c --- /dev/null +++ b/_autosummary/elm.utilities.parse.format_html_tables.html @@ -0,0 +1,344 @@ + + + + + + + elm.utilities.parse.format_html_tables — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.format_html_tables

+
+
+format_html_tables(text, **kwargs)[source]
+

Format tables within HTML text into pretty markdown.

+

Note that if pandas does not detect enough tables in the text to +match the “<table>” tags, no replacement is performed at all.

+
+
Parameters:
+
    +
  • text (str) – HTML text, possible containing tables enclosed by the +“<table>” tag.

  • +
  • **kwargs – Keyword-arguments to pass to pandas.DataFrame.to_markdown +function. Must not contain the “headers” keyword (this is +supplied internally).

  • +
+
+
Returns:
+

str – Text with HTML tables (if any) converted to markdown.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.html b/_autosummary/elm.utilities.parse.html new file mode 100644 index 00000000..4f5ff7e3 --- /dev/null +++ b/_autosummary/elm.utilities.parse.html @@ -0,0 +1,363 @@ + + + + + + + elm.utilities.parse — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse

+

ELM parsing utilities.

+

Functions

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

clean_headers(pages[, char_thresh, ...])

Clean headers/footers that are duplicated across pages of a document.

combine_pages(pages)

Combine pages of GPT cleaned text into a single string.

format_html_tables(text, **kwargs)

Format tables within HTML text into pretty markdown.

html_to_text(html[, ignore_links])

Call to HTML2Text class with basic args.

is_multi_col(text[, separator])

Does the text look like it has multiple vertical text columns?

read_pdf(pdf_bytes[, verbose])

Read PDF contents from bytes.

read_pdf_ocr(pdf_bytes[, verbose])

Read PDF contents from bytes using Optical Character recognition (OCR).

remove_blank_pages(pages)

Remove any blank pages from the iterable.

remove_empty_lines_or_page_footers(text)

Replace empty lines (potentially with page numbers only) as newlines

replace_common_pdf_conversion_chars(text)

Re-format text to remove common pdf-converter chars.

replace_excessive_newlines(text)

Replace instances of three or more newlines with \n\n

replace_multi_dot_lines(text)

Replace instances of three or more dots (.....) with just "..."

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.html_to_text.html b/_autosummary/elm.utilities.parse.html_to_text.html new file mode 100644 index 00000000..233e750f --- /dev/null +++ b/_autosummary/elm.utilities.parse.html_to_text.html @@ -0,0 +1,340 @@ + + + + + + + elm.utilities.parse.html_to_text — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.html_to_text

+
+
+html_to_text(html, ignore_links=True)[source]
+

Call to HTML2Text class with basic args.

+
+
Parameters:
+
    +
  • html (str) – HTML text extracted from the web.

  • +
  • ignore_links (bool, optional) – Option to ignore links in HTML when parsing. +By default, True.

  • +
+
+
Returns:
+

str – Text extracted from the input HTML.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.is_multi_col.html b/_autosummary/elm.utilities.parse.is_multi_col.html new file mode 100644 index 00000000..7d016339 --- /dev/null +++ b/_autosummary/elm.utilities.parse.is_multi_col.html @@ -0,0 +1,340 @@ + + + + + + + elm.utilities.parse.is_multi_col — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.is_multi_col

+
+
+is_multi_col(text, separator='    ')[source]
+

Does the text look like it has multiple vertical text columns?

+
+
Parameters:
+
    +
  • text (str) – Input text, which may or may not contain multiple vertical +columns.

  • +
  • separator (str) – Heuristic split string to look for spaces between columns

  • +
+
+
Returns:
+

out (bool) – True if more than one vertical text column

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.read_pdf.html b/_autosummary/elm.utilities.parse.read_pdf.html new file mode 100644 index 00000000..39b7b816 --- /dev/null +++ b/_autosummary/elm.utilities.parse.read_pdf.html @@ -0,0 +1,342 @@ + + + + + + + elm.utilities.parse.read_pdf — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.read_pdf

+
+
+read_pdf(pdf_bytes, verbose=True)[source]
+

Read PDF contents from bytes.

+

This method will automatically try to detect multi-column format +and load the text without a physical layout in that case.

+
+
Parameters:
+
    +
  • pdf_bytes (bytes) – Bytes corresponding to a PDF file.

  • +
  • verbose (bool, optional) – Option to log errors during parsing. By default, True.

  • +
+
+
Returns:
+

iterable – Iterable containing pages of the PDF document. This iterable +may be empty if there was an error reading the PDF file.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.read_pdf_ocr.html b/_autosummary/elm.utilities.parse.read_pdf_ocr.html new file mode 100644 index 00000000..9864448c --- /dev/null +++ b/_autosummary/elm.utilities.parse.read_pdf_ocr.html @@ -0,0 +1,360 @@ + + + + + + + elm.utilities.parse.read_pdf_ocr — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.read_pdf_ocr

+
+
+read_pdf_ocr(pdf_bytes, verbose=True)[source]
+

Read PDF contents from bytes using Optical Character recognition (OCR).

+

This method attempt to read the PDF document using OCR. This is one +of the only ways to parse a scanned PDF document. To use this +function, you will need to install the pytesseract and pdf2image +Modules. Installation guides here:

+
+
+

Windows users may also need to apply the fix described in this +answer before they can use pytesseract: http://tinyurl.com/v9xr4vrj

+
+
Parameters:
+
    +
  • pdf_bytes (bytes) – Bytes corresponding to a PDF file.

  • +
  • verbose (bool, optional) – Option to log errors during parsing. By default, True.

  • +
+
+
Returns:
+

iterable – Iterable containing pages of the PDF document. This iterable +may be empty if there was an error reading the PDF file.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.remove_blank_pages.html b/_autosummary/elm.utilities.parse.remove_blank_pages.html new file mode 100644 index 00000000..c3630bb9 --- /dev/null +++ b/_autosummary/elm.utilities.parse.remove_blank_pages.html @@ -0,0 +1,337 @@ + + + + + + + elm.utilities.parse.remove_blank_pages — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.remove_blank_pages

+
+
+remove_blank_pages(pages)[source]
+

Remove any blank pages from the iterable.

+
+
Parameters:
+

pages (iterable) – Iterable of string objects. Objects in this iterable that do not +contain any text will be removed.

+
+
Returns:
+

list – List of strings with content, or empty list.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.html b/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.html new file mode 100644 index 00000000..8b2dd974 --- /dev/null +++ b/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.html @@ -0,0 +1,337 @@ + + + + + + + elm.utilities.parse.remove_empty_lines_or_page_footers — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.remove_empty_lines_or_page_footers

+
+
+remove_empty_lines_or_page_footers(text)[source]
+

Replace empty lines (potentially with page numbers only) as newlines

+
+
Parameters:
+

text (str) – Text possibly containing empty lines and/or lines with only page +numbers.

+
+
Returns:
+

str – Cleaned text with no empty lines.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.html b/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.html new file mode 100644 index 00000000..098fb598 --- /dev/null +++ b/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.html @@ -0,0 +1,337 @@ + + + + + + + elm.utilities.parse.replace_common_pdf_conversion_chars — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.replace_common_pdf_conversion_chars

+
+
+replace_common_pdf_conversion_chars(text)[source]
+

Re-format text to remove common pdf-converter chars.

+

Chars affected include \r\n, \r and \x0c.

+
+
Parameters:
+

text (str) – Input text (presumably from pdf parser).

+
+
Returns:
+

str – Cleaned text.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.replace_excessive_newlines.html b/_autosummary/elm.utilities.parse.replace_excessive_newlines.html new file mode 100644 index 00000000..8e171207 --- /dev/null +++ b/_autosummary/elm.utilities.parse.replace_excessive_newlines.html @@ -0,0 +1,336 @@ + + + + + + + elm.utilities.parse.replace_excessive_newlines — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.replace_excessive_newlines

+
+
+replace_excessive_newlines(text)[source]
+

Replace instances of three or more newlines with \n\n

+
+
Parameters:
+

text (str) – Text possibly containing many repeated newline characters.

+
+
Returns:
+

str – Cleaned text with only a maximum of two newlines in a row.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.parse.replace_multi_dot_lines.html b/_autosummary/elm.utilities.parse.replace_multi_dot_lines.html new file mode 100644 index 00000000..65d61fa3 --- /dev/null +++ b/_autosummary/elm.utilities.parse.replace_multi_dot_lines.html @@ -0,0 +1,336 @@ + + + + + + + elm.utilities.parse.replace_multi_dot_lines — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.parse.replace_multi_dot_lines

+
+
+replace_multi_dot_lines(text)[source]
+

Replace instances of three or more dots (…..) with just “…”

+
+
Parameters:
+

text (str) – Text possibly containing many repeated dots.

+
+
Returns:
+

str – Cleaned text with only three dots max in a row.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.html b/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.html new file mode 100644 index 00000000..d6d72142 --- /dev/null +++ b/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.html @@ -0,0 +1,361 @@ + + + + + + + elm.utilities.retry.async_retry_with_exponential_backoff — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.retry.async_retry_with_exponential_backoff

+
+
+async_retry_with_exponential_backoff(base_delay=1, exponential_base=4, jitter=True, max_retries=3, errors=(<class 'openai.RateLimitError'>, <class 'openai.APITimeoutError'>))[source]
+

Retry an asynchronous function with exponential backoff.

+

This decorator works out-of-the-box for OpenAI chat completions +calls. To configure it for other functions, set the errors input +accordingly.

+
+
Parameters:
+
    +
  • base_delay (int, optional) – The base delay time, in seconds. This time will be multiplied by +the exponential_base (plus any jitter) during each retry +iteration. The multiplication applies at the first retry. +Therefore, if your base delay is 1 and your +exponential_base is 4 (with no jitter), the delay before +the first retry will be 1 * 4 = 4 seconds. The subsequent +delay will be 4 * 4 = 16 seconds, and so on. +By default, 1.

  • +
  • exponential_base (int, optional) – The multiplication factor applied to the base delay input. +See description of delay for an example. By default, 4.

  • +
  • jitter (bool, optional) – Option to include a random fractional adder (0 - 1) to the +exponential_base before multiplying by the delay. This can +help ensure each function call is submitted slightly offset from +other calls in a batch and therefore help avoid repeated rate +limit failures by a batch of submissions arriving simultaneously +to a service. By default, True.

  • +
  • max_retries (int, optional) – Max number of retries before raising an ELMRuntimeError. +By default, 3.

  • +
  • errors (tuple, optional) – The error class(es) to signal a retry. Other errors will be +propagated without retrying. +By default, (openai.RateLimitError, openai.APITimeoutError).

  • +
+
+
+

References

+

https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb +https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.retry.html b/_autosummary/elm.utilities.retry.html new file mode 100644 index 00000000..c64b6d94 --- /dev/null +++ b/_autosummary/elm.utilities.retry.html @@ -0,0 +1,333 @@ + + + + + + + elm.utilities.retry — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.retry

+

ELM retry utilities.

+

Functions

+ + + + + + + + + +

async_retry_with_exponential_backoff([...])

Retry an asynchronous function with exponential backoff.

retry_with_exponential_backoff([base_delay, ...])

Retry a synchronous function with exponential backoff.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.html b/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.html new file mode 100644 index 00000000..10d3c5c5 --- /dev/null +++ b/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.html @@ -0,0 +1,361 @@ + + + + + + + elm.utilities.retry.retry_with_exponential_backoff — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.utilities.retry.retry_with_exponential_backoff

+
+
+retry_with_exponential_backoff(base_delay=1, exponential_base=4, jitter=True, max_retries=3, errors=(<class 'openai.RateLimitError'>, <class 'openai.APITimeoutError'>))[source]
+

Retry a synchronous function with exponential backoff.

+

This decorator works out-of-the-box for OpenAI chat completions +calls. To configure it for other functions, set the errors input +accordingly.

+
+
Parameters:
+
    +
  • base_delay (int, optional) – The base delay time, in seconds. This time will be multiplied by +the exponential_base (plus any jitter) during each retry +iteration. The multiplication applies at the first retry. +Therefore, if your base delay is 1 and your +exponential_base is 4 (with no jitter), the delay before +the first retry will be 1 * 4 = 4 seconds. The subsequent +delay will be 4 * 4 = 16 seconds, and so on. +By default, 1.

  • +
  • exponential_base (int, optional) – The multiplication factor applied to the base delay input. +See description of delay for an example. By default, 4.

  • +
  • jitter (bool, optional) – Option to include a random fractional adder (0 - 1) to the +exponential_base before multiplying by the delay. This can +help ensure each function call is submitted slightly offset from +other calls in a batch and therefore help avoid repeated rate +limit failures by a batch of submissions arriving simultaneously +to a service. By default, True.

  • +
  • max_retries (int, optional) – Max number of retries before raising an ELMRuntimeError. +By default, 3.

  • +
  • errors (tuple, optional) – The error class(es) to signal a retry. Other errors will be +propagated without retrying. +By default, (openai.RateLimitError, openai.APITimeoutError).

  • +
+
+
+

References

+

https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb +https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/

+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.version.html b/_autosummary/elm.version.html new file mode 100644 index 00000000..a1cc3d7b --- /dev/null +++ b/_autosummary/elm.version.html @@ -0,0 +1,321 @@ + + + + + + + elm.version — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.version

+

ELM version number

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.document.BaseDocument.html b/_autosummary/elm.web.document.BaseDocument.html new file mode 100644 index 00000000..7e723b61 --- /dev/null +++ b/_autosummary/elm.web.document.BaseDocument.html @@ -0,0 +1,419 @@ + + + + + + + elm.web.document.BaseDocument — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.document.BaseDocument

+
+
+class BaseDocument(pages, metadata=None)[source]
+

Bases: ABC

+

Base ELM web document representation.

+
+
Parameters:
+
    +
  • pages (iterable) – Iterable of strings, where each string is a page of a +document.

  • +
  • metadata (dict, optional) – Optional dict containing metadata for the document. +By default, None.

  • +
+
+
+

Methods

+ + + +
+

Attributes

+ + + + + + + + + + + + + + + + + + +

FILE_EXTENSION

Cleaned document file extension.

WRITE_KWARGS

Dict of kwargs to pass to open when writing this doc.

empty

True if the document contains no pages.

raw_pages

List of (a limited count of) raw pages

text

Cleaned text from document

+
+
+property empty
+

True if the document contains no pages.

+
+
Type:
+

bool

+
+
+
+ +
+
+property raw_pages
+

List of (a limited count of) raw pages

+
+
Type:
+

list

+
+
+
+ +
+
+property text
+

Cleaned text from document

+
+
Type:
+

str

+
+
+
+ +
+
+abstract property WRITE_KWARGS
+

Dict of kwargs to pass to open when writing this doc.

+
+
Type:
+

dict

+
+
+
+ +
+
+abstract property FILE_EXTENSION
+

Cleaned document file extension.

+
+
Type:
+

str

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.document.HTMLDocument.html b/_autosummary/elm.web.document.HTMLDocument.html new file mode 100644 index 00000000..664a57af --- /dev/null +++ b/_autosummary/elm.web.document.HTMLDocument.html @@ -0,0 +1,418 @@ + + + + + + + elm.web.document.HTMLDocument — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.document.HTMLDocument

+
+
+class HTMLDocument(pages, metadata=None, html_table_to_markdown_kwargs=None, ignore_html_links=True, text_splitter=None)[source]
+

Bases: BaseDocument

+

ELM web HTML document

+
+
Parameters:
+
    +
  • pages (iterable) – Iterable of strings, where each string is a page of a +document.

  • +
  • metadata (dict, optional) – Optional dict containing metadata for the document. +By default, None.

  • +
  • html_table_to_markdown_kwargs (dict, optional) – Optional dictionary of keyword-value pair arguments to pass +to the format_html_tables() +function. By default, None.

  • +
  • ignore_html_links (bool, optional) – Option to ignore link in HTML text during parsing. +By default, True.

  • +
  • text_splitter (obj, optional) – Instance of an object that implements a split_text method. +The method should take text as input (str) and return a list +of text chunks. The raw pages will be passed through this +splitter to create raw pages for this document. Langchain’s +text splitters should work for this input. +By default, None, which means the original pages input +becomes the raw pages attribute.

  • +
+
+
+

Methods

+ + + +
+

Attributes

+ + + + + + + + + + + + + + + + + + + + + +

FILE_EXTENSION

HTML_TABLE_TO_MARKDOWN_KWARGS

Default format_html_tables() arguments

WRITE_KWARGS

empty

True if the document contains no pages.

raw_pages

List of (a limited count of) raw pages

text

Cleaned text from document

+
+
+HTML_TABLE_TO_MARKDOWN_KWARGS = {'floatfmt': '.5f', 'index': True, 'tablefmt': 'psql'}
+

Default format_html_tables() arguments

+
+ +
+
+property empty
+

True if the document contains no pages.

+
+
Type:
+

bool

+
+
+
+ +
+
+property raw_pages
+

List of (a limited count of) raw pages

+
+
Type:
+

list

+
+
+
+ +
+
+property text
+

Cleaned text from document

+
+
Type:
+

str

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.document.PDFDocument.html b/_autosummary/elm.web.document.PDFDocument.html new file mode 100644 index 00000000..0e041060 --- /dev/null +++ b/_autosummary/elm.web.document.PDFDocument.html @@ -0,0 +1,434 @@ + + + + + + + elm.web.document.PDFDocument — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.document.PDFDocument

+
+
+class PDFDocument(pages, metadata=None, percent_raw_pages_to_keep=25, max_raw_pages=18, num_end_pages_to_keep=2, clean_header_kwargs=None)[source]
+

Bases: BaseDocument

+

ELM web PDF document

+
+
Parameters:
+
    +
  • pages (iterable) – Iterable of strings, where each string is a page of a +document.

  • +
  • metadata (str, optional) – metadata : dict, optional +Optional dict containing metadata for the document. +By default, None.

  • +
  • percent_raw_pages_to_keep (int, optional) – Percent of “raw” pages to keep. Useful for extracting info +from headers/footers of a doc, which are normally stripped +to form the “clean” text. By default, 25.

  • +
  • max_raw_pages (int, optional) – The max number of raw pages to keep. The number of raw pages +will never exceed the total of this value + +num_end_pages_to_keep. By default, 18.

  • +
  • num_end_pages_to_keep (int, optional) – Number of additional pages to keep from the end of the +document. This can be useful to extract more meta info. +The number of raw pages will never exceed the total of this +value + max_raw_pages. By default, 2.

  • +
  • clean_header_kwargs (dict, optional) – Optional dictionary of keyword-value pair arguments to pass +to the clean_headers() +function. By default, None.

  • +
+
+
+

Methods

+ + + +
+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + +

CLEAN_HEADER_KWARGS

Default clean_headers() arguments

FILE_EXTENSION

WRITE_KWARGS

empty

True if the document contains no pages.

num_raw_pages_to_keep

Number of raw pages to keep from PDF document

raw_pages

List of (a limited count of) raw pages

text

Cleaned text from document

+
+
+CLEAN_HEADER_KWARGS = {'char_thresh': 0.6, 'iheaders': [0, 1, 3, -3, -2, -1], 'page_thresh': 0.8, 'split_on': '\n'}
+

Default clean_headers() arguments

+
+ +
+
+property num_raw_pages_to_keep
+

Number of raw pages to keep from PDF document

+
+
Type:
+

int

+
+
+
+ +
+
+property empty
+

True if the document contains no pages.

+
+
Type:
+

bool

+
+
+
+ +
+
+property raw_pages
+

List of (a limited count of) raw pages

+
+
Type:
+

list

+
+
+
+ +
+
+property text
+

Cleaned text from document

+
+
Type:
+

str

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.document.html b/_autosummary/elm.web.document.html new file mode 100644 index 00000000..880dee49 --- /dev/null +++ b/_autosummary/elm.web.document.html @@ -0,0 +1,336 @@ + + + + + + + elm.web.document — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.document

+

ELM Web Document class definitions

+

Classes

+ + + + + + + + + + + + +

BaseDocument(pages[, metadata])

Base ELM web document representation.

HTMLDocument(pages[, metadata, ...])

ELM web HTML document

PDFDocument(pages[, metadata, ...])

ELM web PDF document

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.file_loader.AsyncFileLoader.html b/_autosummary/elm.web.file_loader.AsyncFileLoader.html new file mode 100644 index 00000000..45558ba6 --- /dev/null +++ b/_autosummary/elm.web.file_loader.AsyncFileLoader.html @@ -0,0 +1,428 @@ + + + + + + + elm.web.file_loader.AsyncFileLoader — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.file_loader.AsyncFileLoader

+
+
+class AsyncFileLoader(header_template=None, verify_ssl=True, aget_kwargs=None, pw_launch_kwargs=None, pdf_read_kwargs=None, html_read_kwargs=None, pdf_read_coroutine=None, html_read_coroutine=None, pdf_ocr_read_coroutine=None, file_cache_coroutine=None, browser_semaphore=None)[source]
+

Bases: object

+

Async web file (PDF or HTML) loader

+
+
Parameters:
+
    +
  • header_template (dict, optional) – Optional GET header template. If not specified, uses the +DEFAULT_HEADER_TEMPLATE defined for this class. +By default, None.

  • +
  • verify_ssl (bool, optional) – Option to use aiohttp’s default SSL check. If False, +SSL certificate validation is skipped. By default, True.

  • +
  • aget_kwargs (dict, optional) – Other kwargs to pass to aiohttp.ClientSession.get(). +By default, None.

  • +
  • pw_launch_kwargs (dict, optional) – Keyword-value argument pairs to pass to +async_playwright.chromium.launch() (only used when +reading HTML). By default, None.

  • +
  • pdf_read_kwargs (dict, optional) – Keyword-value argument pairs to pass to the +pdf_read_coroutine. By default, None.

  • +
  • html_read_kwargs (dict, optional) – Keyword-value argument pairs to pass to the +html_read_coroutine. By default, None.. By default, None.

  • +
  • pdf_read_coroutine (callable, optional) – PDF file read coroutine. Must by an async function. Should +accept PDF bytes as the first argument and kwargs as the +rest. Must return a elm.web.document.PDFDocument. +If None, a default function that runs in the main thread +is used. By default, None.

  • +
  • html_read_coroutine (callable, optional) – HTML file read coroutine. Must by an async function. Should +accept HTML text as the first argument and kwargs as the +rest. Must return a elm.web.document.HTMLDocument. +If None, a default function that runs in the main thread +is used. By default, None.

  • +
  • pdf_ocr_read_coroutine (callable, optional) – PDF OCR file read coroutine. Must by an async function. +Should accept PDF bytes as the first argument and kwargs as +the rest. Must return a elm.web.document.PDFDocument. +If None, PDF OCR parsing is not attempted, and any +scanned PDF URL’s will return a blank document. +By default, None.

  • +
  • file_cache_coroutine (callable, optional) – File caching coroutine. Can be used to cache files +downloaded by this class. Must accept an +Document instance as the first +argument and the file content to be written as the second +argument. If this method is not provided, no document +caching is performed. By default, None.

  • +
  • browser_semaphore (asyncio.Semaphore, optional) – Semaphore instance that can be used to limit the number of +playwright browsers open concurrently. If None, no +limits are applied. By default, None.

  • +
+
+
+

Methods

+ + + + + + + + + +

fetch(url)

Fetch a document for the given URL.

fetch_all(*urls)

Fetch documents for all requested URL's.

+

Attributes

+ + + + + + +

DEFAULT_HEADER_TEMPLATE

Default header

+
+
+DEFAULT_HEADER_TEMPLATE = {'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Language': 'en-US,en;q=0.5', 'Connection': 'keep-alive', 'DNT': '1', 'Referer': 'https://www.google.com/', 'Upgrade-Insecure-Requests': '1', 'User-Agent': ''}
+

Default header

+
+ +
+
+async fetch_all(*urls)[source]
+

Fetch documents for all requested URL’s.

+
+
Parameters:
+

*urls – Iterable of URL’s (as strings) to fetch.

+
+
Returns:
+

list – List of documents, one per requested URL.

+
+
+
+ +
+
+async fetch(url)[source]
+

Fetch a document for the given URL.

+
+
Parameters:
+

url (str) – URL for the document to pull down.

+
+
Returns:
+

elm.web.document.Document – Document instance containing text, if the fetch was +successful.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.file_loader.html b/_autosummary/elm.web.file_loader.html new file mode 100644 index 00000000..7fee34c0 --- /dev/null +++ b/_autosummary/elm.web.file_loader.html @@ -0,0 +1,330 @@ + + + + + + + elm.web.file_loader — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.file_loader

+

ELM Web file loader class.

+

Classes

+ + + + + + +

AsyncFileLoader([header_template, ...])

Async web file (PDF or HTML) loader

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.html b/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.html new file mode 100644 index 00000000..8d9193f8 --- /dev/null +++ b/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.html @@ -0,0 +1,381 @@ + + + + + + + elm.web.google_search.PlaywrightGoogleLinkSearch — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.google_search.PlaywrightGoogleLinkSearch

+
+
+class PlaywrightGoogleLinkSearch(**launch_kwargs)[source]
+

Bases: object

+

Search for top results on google and return their links

+
+
Parameters:
+

**launch_kwargs – Keyword arguments to be passed to +playwright.chromium.launch. For example, you can pass +headless=False, slow_mo=50 for a visualization of the +search.

+
+
+

Methods

+ + + + + + +

results(*queries[, num_results])

Retrieve links for the first num_results of each query.

+

Attributes

+ + + + + + +

EXPECTED_RESULTS_PER_PAGE

Number of results displayed per Google page.

+
+
+EXPECTED_RESULTS_PER_PAGE = 10
+

Number of results displayed per Google page.

+
+ +
+
+async results(*queries, num_results=10)[source]
+

Retrieve links for the first num_results of each query.

+

This function executes a google search for each input query and +returns a list of links corresponding to the top num_results.

+
+
Parameters:
+

num_results (int, optional) – Number of top results to retrieve for each query. Note that +this value can never exceed the number of results per page +(typically 10). If you pass in a larger value, it will be +reduced to the number of results per page. +By default, 10.

+
+
Returns:
+

list – List equal to the length of the input queries, where each +entry is another list containing the top num_results +links.

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.google_search.html b/_autosummary/elm.web.google_search.html new file mode 100644 index 00000000..20c5af84 --- /dev/null +++ b/_autosummary/elm.web.google_search.html @@ -0,0 +1,330 @@ + + + + + + + elm.web.google_search — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ + + + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.html b/_autosummary/elm.web.html new file mode 100644 index 00000000..61831398 --- /dev/null +++ b/_autosummary/elm.web.html @@ -0,0 +1,340 @@ + + + + + + + elm.web — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web

+

ELM Web scraping.

+ + + + + + + + + + + + + + + + + + +

elm.web.document

ELM Web Document class definitions

elm.web.file_loader

ELM Web file loader class.

elm.web.google_search

ELM Web Scraping - Google search.

elm.web.html_pw

ELM Web HTML loading with Playwright

elm.web.utilities

ELM Web Scraping utilities.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.html_pw.html b/_autosummary/elm.web.html_pw.html new file mode 100644 index 00000000..2a2c1919 --- /dev/null +++ b/_autosummary/elm.web.html_pw.html @@ -0,0 +1,331 @@ + + + + + + + elm.web.html_pw — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.html_pw

+

ELM Web HTML loading with Playwright

+

We use Playwright so that javascript text is rendered before we scrape.

+

Functions

+ + + + + + +

load_html_with_pw(url[, browser_semaphore])

Extract HTML from URL using Playwright.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.html_pw.load_html_with_pw.html b/_autosummary/elm.web.html_pw.load_html_with_pw.html new file mode 100644 index 00000000..e52149ce --- /dev/null +++ b/_autosummary/elm.web.html_pw.load_html_with_pw.html @@ -0,0 +1,343 @@ + + + + + + + elm.web.html_pw.load_html_with_pw — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.html_pw.load_html_with_pw

+
+
+async load_html_with_pw(url, browser_semaphore=None, **pw_launch_kwargs)[source]
+

Extract HTML from URL using Playwright.

+
+
Parameters:
+
    +
  • url (str) – URL to pull HTML for.

  • +
  • browser_semaphore (asyncio.Semaphore, optional) – Semaphore instance that can be used to limit the number of +playwright browsers open concurrently. If None, no limits +are applied. By default, None.

  • +
  • **pw_launch_kwargs – Keyword-value argument pairs to pass to +async_playwright.chromium.launch().

  • +
+
+
Returns:
+

str – HTML from page.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.utilities.clean_search_query.html b/_autosummary/elm.web.utilities.clean_search_query.html new file mode 100644 index 00000000..7f8c5557 --- /dev/null +++ b/_autosummary/elm.web.utilities.clean_search_query.html @@ -0,0 +1,340 @@ + + + + + + + elm.web.utilities.clean_search_query — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.utilities.clean_search_query

+
+
+clean_search_query(query)[source]
+

Check if the first character is a digit and remove it if so.

+

Some search tools (e.g., Google) will fail to return results if the +query has a leading digit: 1. “LangCh…”

+

This function will take all the text after the first double quote +(”) if a digit is detected at the beginning of the string.

+
+
Parameters:
+

query (str) – Input query that may or may not contain a leading digit.

+
+
Returns:
+

str – Cleaned query.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.utilities.compute_fn_from_url.html b/_autosummary/elm.web.utilities.compute_fn_from_url.html new file mode 100644 index 00000000..585ec8e5 --- /dev/null +++ b/_autosummary/elm.web.utilities.compute_fn_from_url.html @@ -0,0 +1,343 @@ + + + + + + + elm.web.utilities.compute_fn_from_url — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.utilities.compute_fn_from_url

+
+
+compute_fn_from_url(url, make_unique=False)[source]
+

Compute a unique file name from URL string.

+

File name will always be 128 characters or less, unless the +make_unique argument is set to true. In that case, the max +length is 164 (a UUID is tagged onto the filename).

+
+
Parameters:
+
    +
  • url (str) – Input URL to convert into filename.

  • +
  • make_unique (bool, optional) – Option to add a UUID at the end of the file name to make it +unique. By default, False.

  • +
+
+
Returns:
+

str – Valid filename representation of the URL.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.utilities.html b/_autosummary/elm.web.utilities.html new file mode 100644 index 00000000..a8868b8b --- /dev/null +++ b/_autosummary/elm.web.utilities.html @@ -0,0 +1,336 @@ + + + + + + + elm.web.utilities — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.utilities

+

ELM Web Scraping utilities.

+

Functions

+ + + + + + + + + + + + +

clean_search_query(query)

Check if the first character is a digit and remove it if so.

compute_fn_from_url(url[, make_unique])

Compute a unique file name from URL string.

write_url_doc_to_file(doc, file_content, out_dir)

Write a file pulled from URL to disk.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.web.utilities.write_url_doc_to_file.html b/_autosummary/elm.web.utilities.write_url_doc_to_file.html new file mode 100644 index 00000000..6b87f40b --- /dev/null +++ b/_autosummary/elm.web.utilities.write_url_doc_to_file.html @@ -0,0 +1,346 @@ + + + + + + + elm.web.utilities.write_url_doc_to_file — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.web.utilities.write_url_doc_to_file

+
+
+write_url_doc_to_file(doc, file_content, out_dir, make_name_unique=False)[source]
+

Write a file pulled from URL to disk.

+
+
Parameters:
+
    +
  • doc (elm.web.document.Document) – Document containing meta information about the file. Must have a +“source” key in the metadata dict containing the URL, which +will be converted to a file name using +compute_fn_from_url().

  • +
  • file_content (str | bytes) – File content, typically string text for HTML files and bytes +for PDF file.

  • +
  • out_dir (path-like) – Path to directory where file should be stored.

  • +
  • make_name_unique (bool, optional) – Option to make file name unique by adding a UUID at the end of +the file name. By default, False.

  • +
+
+
Returns:
+

Path – Path to output file.

+
+
+
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.wizard.EnergyWizard.html b/_autosummary/elm.wizard.EnergyWizard.html new file mode 100644 index 00000000..6f6deefc --- /dev/null +++ b/_autosummary/elm.wizard.EnergyWizard.html @@ -0,0 +1,801 @@ + + + + + + + elm.wizard.EnergyWizard — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.wizard.EnergyWizard

+
+
+class EnergyWizard(corpus, model=None, token_budget=3500, ref_col=None)[source]
+

Bases: ApiBase

+

Interface to ask OpenAI LLMs about energy research.

+
+
Parameters:
+
    +
  • corpus (pd.DataFrame) – Corpus of text in dataframe format. Must have columns “text” and +“embedding”.

  • +
  • model (str) – GPT model name, default is the DEFAULT_MODEL global var

  • +
  • token_budget (int) – Number of tokens that can be embedded in the prompt. Note that the +default budget for GPT-3.5-Turbo is 4096, but you want to subtract +some tokens to account for the response budget.

  • +
  • ref_col (None | str) – Optional column label in the corpus that provides a reference text +string for each chunk of text.

  • +
+
+
+

Methods

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +

call_api(url, headers, request_json)

Make an asyncronous OpenAI API call.

call_api_async(url, headers, all_request_jsons)

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

chat(query[, debug, stream, temperature, ...])

Answers a query by doing a semantic search of relevant text with embeddings and then sending engineered query to the LLM.

clear()

Clear chat history and reduce messages to just the initial model role message.

cosine_dist(query_embedding)

Compute the cosine distance of the query embedding array vs.

count_tokens(text, model)

Return the number of tokens in a string.

engineer_query(query[, token_budget, ...])

Engineer a query for GPT using the corpus of information

generic_async_query(queries[, model_role, ...])

Run a number of generic single queries asynchronously (not conversational)

generic_query(query[, model_role, temperature])

Ask a generic single query without conversation

get_embedding(text)

Get the 1D array (list) embedding of a text string.

make_ref_list(idx)

Make a reference list

preflight_corpus(corpus[, required])

Run preflight checks on the text corpus.

rank_strings(query[, top_n])

Returns a list of strings and relatednesses, sorted from most related to least.

+

Attributes

+ + + + + + + + + + + + + + + + + + + + + + + + + + + +

DEFAULT_MODEL

Default model to do pdf text cleaning.

EMBEDDING_MODEL

Default model to do text embeddings.

EMBEDDING_URL

OpenAI embedding API URL

HEADERS

OpenAI API Headers

MODEL_INSTRUCTION

Prefix to the engineered prompt

MODEL_ROLE

High level model role, somewhat redundant to MODEL_INSTRUCTION

URL

OpenAI API URL to be used with environment variable OPENAI_API_KEY.

all_messages_txt

Get a string printout of the full conversation with the LLM

+
+
+MODEL_ROLE = 'You parse through articles to answer questions.'
+

High level model role, somewhat redundant to MODEL_INSTRUCTION

+
+ +
+
+MODEL_INSTRUCTION = 'Use the information below to answer the subsequent question. If the answer cannot be found in the text, write "I could not find an answer."'
+

Prefix to the engineered prompt

+
+ +
+
+static preflight_corpus(corpus, required=('text', 'embedding'))[source]
+

Run preflight checks on the text corpus.

+
+
Parameters:
+
    +
  • corpus (pd.DataFrame) – Corpus of text in dataframe format. Must have columns “text” and +“embedding”.

  • +
  • required (list | tuple) – Column names required to be in the corpus df

  • +
+
+
Returns:
+

corpus (pd.DataFrame) – Corpus of text in dataframe format. Must have columns “text” and +“embedding”.

+
+
+
+ +
+
+cosine_dist(query_embedding)[source]
+

Compute the cosine distance of the query embedding array vs. all of +the embedding arrays of the full text corpus

+
+
Parameters:
+

query_embedding (np.ndarray) – 1D array of the numerical embedding of the request query.

+
+
Returns:
+

out (np.ndarray) – 1D array with length equal to the number of entries in the text +corpus. Each value is a distance score where smaller is closer

+
+
+
+ +
+
+rank_strings(query, top_n=100)[source]
+

Returns a list of strings and relatednesses, sorted from most +related to least.

+
+
Parameters:
+
    +
  • query (str) – Question being asked of GPT

  • +
  • top_n (int) – Number of top results to return.

  • +
+
+
Returns:
+

    +
  • strings (np.ndarray) – 1D array of related strings

  • +
  • score (np.ndarray) – 1D array of float scores of strings

  • +
  • idx (np.ndarray) – 1D array of indices in the text corpus corresponding to the +ranked strings/scores outputs.

  • +
+

+
+
+
+ +
+
+engineer_query(query, token_budget=None, new_info_threshold=0.7, convo=False)[source]
+

Engineer a query for GPT using the corpus of information

+
+
Parameters:
+
    +
  • query (str) – Question being asked of GPT

  • +
  • token_budget (int) – Option to override the class init token budget.

  • +
  • new_info_threshold (float) – New text added to the engineered query must contain at least this +much new information. This helps prevent (for example) the table of +contents being added multiple times.

  • +
  • convo (bool) – Flag to perform semantic search with full conversation history +(True) or just the single query (False). Call EnergyWizard.clear() +to reset the chat history.

  • +
+
+
Returns:
+

    +
  • message (str) – Engineered question to GPT including information from corpus and +the original query

  • +
  • references (list) – The list of references (strs) used in the engineered prompt is +returned here

  • +
+

+
+
+
+ +
+
+DEFAULT_MODEL = 'gpt-3.5-turbo'
+

Default model to do pdf text cleaning.

+
+ +
+
+EMBEDDING_MODEL = 'text-embedding-ada-002'
+

Default model to do text embeddings.

+
+ +
+
+EMBEDDING_URL = 'https://api.openai.com/v1/embeddings'
+

OpenAI embedding API URL

+
+ +
+
+HEADERS = {'Authorization': 'Bearer None', 'Content-Type': 'application/json', 'api-key': 'None'}
+

OpenAI API Headers

+
+ +
+
+URL = 'https://api.openai.com/v1/chat/completions'
+

OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use +an Azure API endpoint to trigger Azure usage along with environment +variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and +AZURE_OPENAI_ENDPOINT

+
+ +
+
+property all_messages_txt
+

Get a string printout of the full conversation with the LLM

+
+
Returns:
+

str

+
+
+
+ +
+
+async static call_api(url, headers, request_json)
+

Make an asyncronous OpenAI API call.

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • request_json (dict) –

    +
    +
    API data input, typically looks like this for chat completion:
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
    +
  • +
+
+
Returns:
+

out (dict) – API response in json format

+
+
+
+ +
+
+async call_api_async(url, headers, all_request_jsons, ignore_error=None, rate_limit=40000.0)
+

Use GPT to clean raw pdf text in parallel calls to the OpenAI API.

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await PDFtoTXT.clean_txt_async()

+
+
Parameters:
+
    +
  • url (str) –

    +
    +
    OpenAI API url, typically either:

    https://api.openai.com/v1/embeddings +https://api.openai.com/v1/chat/completions

    +
    +
    +
  • +
  • headers (dict) –

    +
    +
    OpenAI API headers, typically:
    +
    {“Content-Type”: “application/json”,

    “Authorization”: f”Bearer {openai.api_key}”}

    +
    +
    +
    +
    +
  • +
  • all_request_jsons (list) – List of API data input, one entry typically looks like this for +chat completion:

    +
    +
    +
    {“model”: “gpt-3.5-turbo”,
    +
    “messages”: [{“role”: “system”, “content”: “You do this…”},

    {“role”: “user”, “content”: “Do this: {}”}],

    +
    +
    +

    “temperature”: 0.0}

    +
    +
    +
    +
  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

out (list) – List of API outputs where each list entry is a GPT answer from the +corresponding message in the all_request_jsons input.

+
+
+
+ +
+
+clear()
+

Clear chat history and reduce messages to just the initial model +role message.

+
+ +
+
+static count_tokens(text, model)
+

Return the number of tokens in a string.

+
+
Parameters:
+
    +
  • text (str) – Text string to get number of tokens for

  • +
  • model (str) – specification of OpenAI model to use (e.g., “gpt-3.5-turbo”)

  • +
+
+
Returns:
+

n (int) – Number of tokens in text

+
+
+
+ +
+
+async generic_async_query(queries, model_role=None, temperature=0, ignore_error=None, rate_limit=40000.0)
+

Run a number of generic single queries asynchronously +(not conversational)

+

NOTE: you need to call this using the await command in ipython or +jupyter, e.g.: out = await Summary.run_async()

+
+
Parameters:
+
    +
  • query (list) – Questions to ask ChatGPT (list of strings)

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • ignore_error (None | callable) – Optional callable to parse API error string. If the callable +returns True, the error will be ignored, the API call will not be +tried again, and the output will be an empty string.

  • +
  • rate_limit (float) – OpenAI API rate limit (tokens / minute). Note that the +gpt-3.5-turbo limit is 90k as of 4/2023, but we’re using a large +factor of safety (~1/2) because we can only count the tokens on the +input side and assume the output is about the same count.

  • +
+
+
Returns:
+

response (list) – Model responses with same length as query input.

+
+
+
+ +
+
+generic_query(query, model_role=None, temperature=0)
+

Ask a generic single query without conversation

+
+
Parameters:
+
    +
  • query (str) – Question to ask ChatGPT

  • +
  • model_role (str | None) – Role for the model to take, e.g.: “You are a research assistant”. +This defaults to self.MODEL_ROLE

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
+
+
Returns:
+

response (str) – Model response

+
+
+
+ +
+
+classmethod get_embedding(text)
+

Get the 1D array (list) embedding of a text string.

+
+
Parameters:
+

text (str) – Text to embed

+
+
Returns:
+

embedding (list) – List of float that represents the numerical embedding of the text

+
+
+
+ +
+
+make_ref_list(idx)[source]
+

Make a reference list

+
+
Parameters:
+

used_index (np.ndarray) – Indices of the used text from the text corpus

+
+
Returns:
+

ref_list (list) – A list of references (strs) used.

+
+
+
+ +
+
+chat(query, debug=True, stream=True, temperature=0, convo=False, token_budget=None, new_info_threshold=0.7, print_references=False, return_chat_obj=False)[source]
+

Answers a query by doing a semantic search of relevant text with +embeddings and then sending engineered query to the LLM.

+
+
Parameters:
+
    +
  • query (str) – Question being asked of EnergyWizard

  • +
  • debug (bool) – Flag to return extra diagnostics on the engineered question.

  • +
  • stream (bool) – Flag to print subsequent chunks of the response in a streaming +fashion

  • +
  • temperature (float) – GPT model temperature, a measure of response entropy from 0 to 1. 0 +is more reliable and nearly deterministic; 1 will give the model +more creative freedom and may not return as factual of results.

  • +
  • convo (bool) – Flag to perform semantic search with full conversation history +(True) or just the single query (False). Call EnergyWizard.clear() +to reset the chat history.

  • +
  • token_budget (int) – Option to override the class init token budget.

  • +
  • new_info_threshold (float) – New text added to the engineered query must contain at least this +much new information. This helps prevent (for example) the table of +contents being added multiple times.

  • +
  • print_references (bool) – Flag to print references if EnergyWizard is initialized with a +valid ref_col.

  • +
  • return_chat_obj (bool) – Flag to only return the ChatCompletion from OpenAI API.

  • +
+
+
Returns:
+

    +
  • response (str) – GPT output / answer.

  • +
  • query (str) – If debug is True, the engineered query asked of GPT will also be +returned here

  • +
  • references (list) – If debug is True, the list of references (strs) used in the +engineered prompt is returned here

  • +
+

+
+
+
+ +
+ +
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_autosummary/elm.wizard.html b/_autosummary/elm.wizard.html new file mode 100644 index 00000000..74240fea --- /dev/null +++ b/_autosummary/elm.wizard.html @@ -0,0 +1,329 @@ + + + + + + + elm.wizard — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm.wizard

+

ELM energy wizard

+

Classes

+ + + + + + +

EnergyWizard(corpus[, model, token_budget, ...])

Interface to ask OpenAI LLMs about energy research.

+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_cli/cli.html b/_cli/cli.html new file mode 100644 index 00000000..3a489ff9 --- /dev/null +++ b/_cli/cli.html @@ -0,0 +1,327 @@ + + + + + + + Command Line Interfaces (CLIs) — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Command Line Interfaces (CLIs)

+
+ +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_cli/elm.html b/_cli/elm.html new file mode 100644 index 00000000..52f3d71d --- /dev/null +++ b/_cli/elm.html @@ -0,0 +1,349 @@ + + + + + + + elm — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

elm

+

ELM ordinances command line interface.

+
elm [OPTIONS] COMMAND [ARGS]...
+
+
+

Options

+
+
+--version
+

Show the version and exit.

+
+ +
+

ords

+

Download and extract ordinances for a list of counties.

+
elm ords [OPTIONS]
+
+
+

Options

+
+
+-c, --config <config>
+

Required Path to ordinance configuration JSON file. This file should contain any/all the arguments to pass to elm.ords.process.process_counties_with_openai().

+
+ +
+
+-v, --verbose
+

Flag to show logging on the terminal. Default is not to show any logs on the terminal.

+
+ +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/base.html b/_modules/elm/base.html new file mode 100644 index 00000000..461f6968 --- /dev/null +++ b/_modules/elm/base.html @@ -0,0 +1,913 @@ + + + + + + elm.base — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.base

+# -*- coding: utf-8 -*-
+"""
+ELM abstract class for API calls
+"""
+from abc import ABC
+import os
+import numpy as np
+import asyncio
+import aiohttp
+import openai
+import requests
+import tiktoken
+import time
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class ApiBase(ABC): + """Class to parse text from a PDF document.""" + + DEFAULT_MODEL = 'gpt-3.5-turbo' + """Default model to do pdf text cleaning.""" + + EMBEDDING_MODEL = 'text-embedding-ada-002' + """Default model to do text embeddings.""" + + EMBEDDING_URL = 'https://api.openai.com/v1/embeddings' + """OpenAI embedding API URL""" + + URL = 'https://api.openai.com/v1/chat/completions' + """OpenAI API URL to be used with environment variable OPENAI_API_KEY. Use + an Azure API endpoint to trigger Azure usage along with environment + variables AZURE_OPENAI_KEY, AZURE_OPENAI_VERSION, and + AZURE_OPENAI_ENDPOINT""" + + HEADERS = {"Content-Type": "application/json", + "Authorization": f"Bearer {openai.api_key}", + "api-key": f"{openai.api_key}", + } + """OpenAI API Headers""" + + MODEL_ROLE = "You are a research assistant that answers questions." + """High level model role""" + + def __init__(self, model=None): + """ + Parameters + ---------- + model : None | str + Optional specification of OpenAI model to use. Default is + cls.DEFAULT_MODEL + """ + self.model = model or self.DEFAULT_MODEL + self.api_queue = None + self.messages = [] + self.clear() + + if 'openai.azure.com' in self.URL.lower(): + key = os.environ.get("AZURE_OPENAI_KEY") + version = os.environ.get("AZURE_OPENAI_VERSION") + endpoint = os.environ.get("AZURE_OPENAI_ENDPOINT") + assert key is not None, "Must set AZURE_OPENAI_KEY!" + assert version is not None, "Must set AZURE_OPENAI_VERSION!" + assert endpoint is not None, "Must set AZURE_OPENAI_ENDPOINT!" + self._client = openai.AzureOpenAI(api_key=key, + api_version=version, + azure_endpoint=endpoint) + else: + key = os.environ.get("OPENAI_API_KEY") + assert key is not None, "Must set OPENAI_API_KEY!" + self._client = openai.OpenAI(api_key=key) + + @property + def all_messages_txt(self): + """Get a string printout of the full conversation with the LLM + + Returns + ------- + str + """ + messages = [f"{msg['role'].upper()}: {msg['content']}" + for msg in self.messages] + messages = '\n\n'.join(messages) + return messages + +
+[docs] + def clear(self): + """Clear chat history and reduce messages to just the initial model + role message.""" + self.messages = [{"role": "system", "content": self.MODEL_ROLE}]
+ + +
+[docs] + @staticmethod + async def call_api(url, headers, request_json): + """Make an asyncronous OpenAI API call. + + Parameters + ---------- + url : str + OpenAI API url, typically either: + https://api.openai.com/v1/embeddings + https://api.openai.com/v1/chat/completions + headers : dict + OpenAI API headers, typically: + {"Content-Type": "application/json", + "Authorization": f"Bearer {openai.api_key}"} + request_json : dict + API data input, typically looks like this for chat completion: + {"model": "gpt-3.5-turbo", + "messages": [{"role": "system", "content": "You do this..."}, + {"role": "user", "content": "Do this: {}"}], + "temperature": 0.0} + + Returns + ------- + out : dict + API response in json format + """ + + out = None + kwargs = dict(url=url, headers=headers, json=request_json) + try: + async with aiohttp.ClientSession() as session: + async with session.post(**kwargs) as response: + out = await response.json() + + except Exception as e: + logger.debug(f'Error in OpenAI API call from ' + f'`aiohttp.ClientSession().post(**kwargs)` with ' + f'kwargs: {kwargs}') + logger.exception('Error in OpenAI API call! Turn on debug logging ' + 'to see full query that caused error.') + out = {'error': str(e)} + + return out
+ + +
+[docs] + async def call_api_async(self, url, headers, all_request_jsons, + ignore_error=None, rate_limit=40e3): + """Use GPT to clean raw pdf text in parallel calls to the OpenAI API. + + NOTE: you need to call this using the await command in ipython or + jupyter, e.g.: `out = await PDFtoTXT.clean_txt_async()` + + Parameters + ---------- + url : str + OpenAI API url, typically either: + https://api.openai.com/v1/embeddings + https://api.openai.com/v1/chat/completions + headers : dict + OpenAI API headers, typically: + {"Content-Type": "application/json", + "Authorization": f"Bearer {openai.api_key}"} + all_request_jsons : list + List of API data input, one entry typically looks like this for + chat completion: + {"model": "gpt-3.5-turbo", + "messages": [{"role": "system", "content": "You do this..."}, + {"role": "user", "content": "Do this: {}"}], + "temperature": 0.0} + ignore_error : None | callable + Optional callable to parse API error string. If the callable + returns True, the error will be ignored, the API call will not be + tried again, and the output will be an empty string. + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + gpt-3.5-turbo limit is 90k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + + Returns + ------- + out : list + List of API outputs where each list entry is a GPT answer from the + corresponding message in the all_request_jsons input. + """ + self.api_queue = ApiQueue(url, headers, all_request_jsons, + ignore_error=ignore_error, + rate_limit=rate_limit) + out = await self.api_queue.run() + return out
+ + +
+[docs] + def chat(self, query, temperature=0): + """Have a continuous chat with the LLM including context from previous + chat() calls stored as attributes in this class. + + Parameters + ---------- + query : str + Question to ask ChatGPT + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + + Returns + ------- + response : str + Model response + """ + + self.messages.append({"role": "user", "content": query}) + + kwargs = dict(model=self.model, + messages=self.messages, + temperature=temperature, + stream=False) + + response = self._client.chat.completions.create(**kwargs) + response = response.choices[0].message.content + self.messages.append({'role': 'assistant', 'content': response}) + + return response
+ + +
+[docs] + def generic_query(self, query, model_role=None, temperature=0): + """Ask a generic single query without conversation + + Parameters + ---------- + query : str + Question to ask ChatGPT + model_role : str | None + Role for the model to take, e.g.: "You are a research assistant". + This defaults to self.MODEL_ROLE + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + + Returns + ------- + response : str + Model response + """ + + model_role = model_role or self.MODEL_ROLE + messages = [{"role": "system", "content": model_role}, + {"role": "user", "content": query}] + kwargs = dict(model=self.model, + messages=messages, + temperature=temperature, + stream=False) + + response = self._client.chat.completions.create(**kwargs) + response = response.choices[0].message.content + return response
+ + +
+[docs] + async def generic_async_query(self, queries, model_role=None, + temperature=0, ignore_error=None, + rate_limit=40e3): + """Run a number of generic single queries asynchronously + (not conversational) + + NOTE: you need to call this using the await command in ipython or + jupyter, e.g.: `out = await Summary.run_async()` + + Parameters + ---------- + query : list + Questions to ask ChatGPT (list of strings) + model_role : str | None + Role for the model to take, e.g.: "You are a research assistant". + This defaults to self.MODEL_ROLE + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + ignore_error : None | callable + Optional callable to parse API error string. If the callable + returns True, the error will be ignored, the API call will not be + tried again, and the output will be an empty string. + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + gpt-3.5-turbo limit is 90k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + + Returns + ------- + response : list + Model responses with same length as query input. + """ + + model_role = model_role or self.MODEL_ROLE + all_request_jsons = [] + for msg in queries: + msg = [{'role': 'system', 'content': self.MODEL_ROLE}, + {'role': 'user', 'content': msg}] + req = {"model": self.model, "messages": msg, + "temperature": temperature} + all_request_jsons.append(req) + + self.api_queue = ApiQueue(self.URL, self.HEADERS, all_request_jsons, + ignore_error=ignore_error, + rate_limit=rate_limit) + out = await self.api_queue.run() + + for i, response in enumerate(out): + choice = response.get('choices', [{'message': {'content': ''}}])[0] + message = choice.get('message', {'content': ''}) + content = message.get('content', '') + if not any(content): + logger.error(f'Received no output for query {i + 1}!') + else: + out[i] = content + + return out
+ + +
+[docs] + @classmethod + def get_embedding(cls, text): + """Get the 1D array (list) embedding of a text string. + + Parameters + ---------- + text : str + Text to embed + + Returns + ------- + embedding : list + List of float that represents the numerical embedding of the text + """ + kwargs = dict(url=cls.EMBEDDING_URL, + headers=cls.HEADERS, + json={'model': cls.EMBEDDING_MODEL, + 'input': text}) + + out = requests.post(**kwargs) + embedding = out.json() + + try: + embedding = embedding["data"][0]["embedding"] + except Exception as exc: + msg = ('Embedding request failed: {} {}' + .format(out.reason, embedding)) + logger.error(msg) + raise RuntimeError(msg) from exc + + return embedding
+ + +
+[docs] + @staticmethod + def count_tokens(text, model): + """Return the number of tokens in a string. + + Parameters + ---------- + text : str + Text string to get number of tokens for + model : str + specification of OpenAI model to use (e.g., "gpt-3.5-turbo") + + Returns + ------- + n : int + Number of tokens in text + """ + + # Optional mappings for weird azure names to tiktoken/openai names + tokenizer_aliases = {'gpt-35-turbo': 'gpt-3.5-turbo', + 'gpt-4-32k': 'gpt-4-32k-0314' + } + + token_model = tokenizer_aliases.get(model, model) + encoding = tiktoken.encoding_for_model(token_model) + + return len(encoding.encode(text))
+
+ + + +
+[docs] +class ApiQueue: + """Class to manage the parallel API queue and submission""" + + def __init__(self, url, headers, request_jsons, ignore_error=None, + rate_limit=40e3, max_retries=10): + """ + Parameters + ---------- + url : str + OpenAI API url, typically either: + https://api.openai.com/v1/embeddings + https://api.openai.com/v1/chat/completions + headers : dict + OpenAI API headers, typically: + {"Content-Type": "application/json", + "Authorization": f"Bearer {openai.api_key}"} + all_request_jsons : list + List of API data input, one entry typically looks like this for + chat completion: + {"model": "gpt-3.5-turbo", + "messages": [{"role": "system", "content": "You do this..."}, + {"role": "user", "content": "Do this: {}"}], + "temperature": 0.0} + ignore_error : None | callable + Optional callable to parse API error string. If the callable + returns True, the error will be ignored, the API call will not be + tried again, and the output will be an empty string. + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + gpt-3.5-turbo limit is 90k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + max_retries : int + Number of times to retry an API call with an error response before + raising an error. + """ + + self.url = url + self.headers = headers + self.request_jsons = request_jsons + self.ignore_error = ignore_error + self.rate_limit = rate_limit + self.max_retries = max_retries + self.api_jobs = None + self.todo = None + self.out = None + self.errors = None + self.tries = None + self._retry = False + self._tsub = 0 + self._reset() + self.job_names = [f'job_{str(ijob).zfill(4)}' + for ijob in range(len(request_jsons))] + + def _reset(self): + self.api_jobs = {} + self.todo = [True] * len(self) + self.out = [None] * len(self) + self.errors = [None] * len(self) + self.tries = np.zeros(len(self), dtype=int) + self._retry = False + self._tsub = 0 + + def __len__(self): + """Number of API calls to submit""" + return len(self.request_jsons) + + @property + def waiting_on(self): + """Get a list of async jobs that are being waited on.""" + return [job for ijob, job in self.api_jobs.items() if self.todo[ijob]] + +
+[docs] + def submit_jobs(self): + """Submit a subset jobs asynchronously and hold jobs in the `api_jobs` + attribute. Break when the `rate_limit` is exceeded.""" + + token_count = 0 + t_elap = (time.time() - self._tsub) / 60 + avail_tokens = self.rate_limit * t_elap + avail_tokens = min(self.rate_limit, avail_tokens) + + for ijob, itodo in enumerate(self.todo): + if (ijob not in self.api_jobs + and itodo + and token_count < avail_tokens): + request = self.request_jsons[ijob] + model = request['model'] + tokens = ApiBase.count_tokens(str(request), model) + + if tokens > self.rate_limit: + msg = ('Job index #{} with has {} tokens which ' + 'is greater than the rate limit of {}!' + .format(ijob, tokens, self.rate_limit)) + logger.error(msg) + raise RuntimeError(msg) + + elif tokens < avail_tokens: + token_count += tokens + task = asyncio.create_task(ApiBase.call_api(self.url, + self.headers, + request), + name=self.job_names[ijob]) + self.api_jobs[ijob] = task + self.tries[ijob] += 1 + self._tsub = time.time() + + logger.debug('Submitted "{}" ({} out of {}). ' + 'Token count: {} ' + '(rate limit is {}). ' + 'Attempts: {}' + .format(self.job_names[ijob], + ijob + 1, len(self), token_count, + self.rate_limit, + self.tries[ijob])) + + elif token_count >= avail_tokens: + token_count = 0 + break
+ + +
+[docs] + async def collect_jobs(self): + """Collect asyncronous API calls and API outputs. Store outputs in the + `out` attribute.""" + + if not any(self.waiting_on): + return + + complete, _ = await asyncio.wait(self.waiting_on, + return_when=asyncio.FIRST_COMPLETED) + + for job in complete: + job_name = job.get_name() + ijob = self.job_names.index(job_name) + task_out = job.result() + + if 'error' in task_out: + msg = ('Received API error for task #{0} ' + '(see `ApiQueue.errors[{1}]` and ' + '`ApiQueue.request_jsons[{1}]` for more details). ' + 'Error message: {2}'.format(ijob + 1, ijob, task_out)) + self.errors[ijob] = 'Error: {}'.format(task_out) + + if (self.ignore_error is not None + and self.ignore_error(str(task_out))): + msg += ' Ignoring error and moving on.' + dummy = {'choices': [{'message': {'content': ''}}]} + self.out[ijob] = dummy + self.todo[ijob] = False + else: + del self.api_jobs[ijob] + msg += ' Retrying query.' + self._retry = True + logger.error(msg) + + else: + self.out[ijob] = task_out + self.todo[ijob] = False + + n_complete = len(self) - sum(self.todo) + logger.debug('Finished {} API calls, {} left' + .format(n_complete, sum(self.todo)))
+ + +
+[docs] + async def run(self): + """Run all asyncronous API calls. + + Returns + ------- + out : list + List of API call outputs with same ordering as `request_jsons` + input. + """ + + self._reset() + logger.debug('Submitting async API calls...') + + i = 0 + while any(self.todo): + i += 1 + self._retry = False + self.submit_jobs() + await self.collect_jobs() + + if any(self.tries > self.max_retries): + msg = (f'Hit {self.max_retries} retries on API queries. ' + 'Stopping. See `ApiQueue.errors` for more ' + 'details on error response') + logger.error(msg) + raise RuntimeError(msg) + elif self._retry: + time.sleep(10) + elif i > 1e4: + raise RuntimeError('Hit 1e4 iterations. What are you doing?') + elif any(self.todo): + time.sleep(5) + + return self.out
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/chunk.html b/_modules/elm/chunk.html new file mode 100644 index 00000000..77f203f6 --- /dev/null +++ b/_modules/elm/chunk.html @@ -0,0 +1,594 @@ + + + + + + elm.chunk — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.chunk

+# -*- coding: utf-8 -*-
+"""
+Utility to break text up into overlapping chunks.
+"""
+import copy
+from elm.base import ApiBase
+
+
+
+[docs] +class Chunker(ApiBase): + """ + Class to break text up into overlapping chunks + + NOTE: very large paragraphs that exceed the tokens per chunk will not be + split up and will still be padded with overlap. + """ + + def __init__(self, text, tag=None, tokens_per_chunk=500, overlap=1, + split_on='\n\n'): + """ + Parameters + ---------- + text : str + Single body of text to break up. Works well if this is a single + document with empty lines between paragraphs. + tag : None | str + Optional reference tag to include at the beginning of each text + chunk + tokens_per_chunk : float + Nominal token count per text chunk. Overlap paragraphs will exceed + this. + overlap : int + Number of paragraphs to overlap between chunks + split_on : str + Sub string to split text into paragraphs. + """ + + super().__init__() + + self._split_on = split_on + self._idc = 0 # iter index for chunk + self.text = self.clean_paragraphs(text) + self.tag = tag + self.tokens_per_chunk = tokens_per_chunk + self.overlap = overlap + self._paragraphs = None + self._ptokens = None + self._ctokens = None + self._chunks = self.chunk_text() + + def __getitem__(self, i): + """Get a chunk index + + Returns + ------- + str + """ + return self.chunks[i] + + def __iter__(self): + self._idc = 0 + return self + + def __next__(self): + """Iterator returns one of the text chunks at a time + + Returns + ------- + str + """ + + if self._idc >= len(self): + raise StopIteration + + out = self.chunks[self._idc] + self._idc += 1 + return out + + def __len__(self): + """Number of text chunks + + Return + ------ + int + """ + return len(self.chunks) + + @property + def chunks(self): + """List of overlapping text chunks (strings). + + Returns + ------- + list + """ + return self._chunks + + @property + def paragraphs(self): + """Get a list of paragraphs in the text demarkated by an empty line. + + Returns + ------- + list + """ + if self._paragraphs is None: + self._paragraphs = self.text.split(self._split_on) + self._paragraphs = [p for p in self._paragraphs + if self.is_good_paragraph(p)] + return self._paragraphs + +
+[docs] + @staticmethod + def clean_paragraphs(text): + """Clean up double line breaks to make sure paragraphs can be detected + in the text.""" + previous_len = len(text) + while True: + text = text.replace('\n ', '\n') + if len(text) == previous_len: + break + else: + previous_len = len(text) + return text
+ + +
+[docs] + @staticmethod + def is_good_paragraph(paragraph): + """Basic tests to make sure the paragraph is useful text.""" + if '.....' in paragraph: + return False + elif paragraph.strip().isnumeric(): + return False + else: + return True
+ + + @property + def paragraph_tokens(self): + """Number of tokens per paragraph. + + Returns + ------- + list + """ + if self._ptokens is None: + self._ptokens = [self.count_tokens(p, self.model) + for p in self.paragraphs] + return self._ptokens + + @property + def chunk_tokens(self): + """Number of tokens per chunk. + + Returns + ------- + list + """ + if self._ctokens is None: + self._ctokens = [self.count_tokens(c, self.model) + for c in self.chunks] + return self._ctokens + +
+[docs] + def merge_chunks(self, chunks_input): + """Merge chunks until they reach the token limit per chunk. + + Parameters + ---------- + chunks_input : list + List of list of integers: [[0, 1], [2], [3, 4]] where nested lists + are chunks and the integers are paragraph indices + + Returns + ------- + chunks : list + List of list of integers: [[0, 1], [2], [3, 4]] where nested lists + are chunks and the integers are paragraph indices + """ + + chunks = copy.deepcopy(chunks_input) + + for i in range(len(chunks) - 1): + chunk0 = chunks[i] + chunk1 = chunks[i + 1] + if chunk0 is not None and chunk1 is not None: + tcount0 = sum(self.paragraph_tokens[j] for j in chunk0) + tcount1 = sum(self.paragraph_tokens[j] for j in chunk1) + if tcount0 + tcount1 < self.tokens_per_chunk: + chunk0 += chunk1 + chunks[i] = chunk0 + chunks[i + 1] = None + + chunks = [c for c in chunks if c is not None] + flat_chunks = [a for b in chunks for a in b] + + assert all(c in list(range(len(self.paragraphs))) for c in flat_chunks) + + return chunks
+ + +
+[docs] + def add_overlap(self, chunks_input): + """Add overlap on either side of a text chunk. This ignores token + limit. + + Parameters + ---------- + chunks_input : list + List of list of integers: [[0, 1], [2], [3, 4]] where nested lists + are chunks and the integers are paragraph indices + + Returns + ------- + chunks : list + List of list of integers: [[0, 1], [2], [3, 4]] where nested lists + are chunks and the integers are paragraph indices + """ + + if len(chunks_input) == 1 or self.overlap == 0: + return chunks_input + + chunks = copy.deepcopy(chunks_input) + + for i, chunk1 in enumerate(chunks_input): + + if i == 0: + chunk2 = chunks_input[i + 1] + chunk1 = chunk1 + chunk2[:self.overlap] + + elif i == len(chunks) - 1: + chunk0 = chunks_input[i - 1] + chunk1 = chunk0[-self.overlap:] + chunk1 + + else: + chunk0 = chunks_input[i - 1] + chunk2 = chunks_input[i + 1] + chunk1 = (chunk0[-self.overlap:] + + chunk1 + + chunk2[:self.overlap]) + + chunks[i] = chunk1 + + return chunks
+ + +
+[docs] + def chunk_text(self): + """Perform the text chunking operation + + Returns + ------- + chunks : list + List of strings where each string is an overlapping chunk of text + """ + + chunks_input = [[i] for i in range(len(self.paragraphs))] + while True: + chunks = self.merge_chunks(chunks_input) + if chunks == chunks_input: + break + else: + chunks_input = copy.deepcopy(chunks) + + chunks = self.add_overlap(chunks) + text_chunks = [] + for chunk in chunks: + paragraphs = [self.paragraphs[c] for c in chunk] + text_chunks.append(self._split_on.join(paragraphs)) + + if self.tag is not None: + text_chunks = [self.tag + '\n\n' + chunk for chunk in text_chunks] + + return text_chunks
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/embed.html b/_modules/elm/embed.html new file mode 100644 index 00000000..bcbb5c91 --- /dev/null +++ b/_modules/elm/embed.html @@ -0,0 +1,489 @@ + + + + + + elm.embed — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.embed

+# -*- coding: utf-8 -*-
+"""
+ELM text embedding
+"""
+import openai
+import re
+import os
+import logging
+
+from elm.base import ApiBase
+from elm.chunk import Chunker
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class ChunkAndEmbed(ApiBase): + """Class to chunk text data and create embeddings""" + + DEFAULT_MODEL = 'text-embedding-ada-002' + """Default model to do embeddings.""" + + def __init__(self, text, model=None, **chunk_kwargs): + """ + Parameters + ---------- + text : str + Single continuous piece of text to chunk up by paragraph and embed + or filepath to .txt file containing one piece of text. + model : None | str + Optional specification of OpenAI model to use. Default is + cls.DEFAULT_MODEL + chunk_kwargs : dict | None + kwargs for initialization of :class:`elm.chunk.Chunker` + """ + + super().__init__(model) + + self.text = text + + if os.path.isfile(text): + logger.info('Loading text file: {}'.format(text)) + with open(text, 'r') as f: + self.text = f.read() + + assert isinstance(self.text, str) + self.text = self.clean_tables(self.text) + + self.text_chunks = Chunker(self.text, **chunk_kwargs) + +
+[docs] + @staticmethod + def clean_tables(text): + """Make sure that table headers are in the same paragraph as the table + itself. Typically, tables are looked for with pipes and hyphens, which + is how GPT cleans tables in text.""" + + # looks for "Table N.", should expand to other formats with additional + # regex patterns later + table_regex = r"^Table [0-9]+." + + lines = text.split('\n') + for idx, line in enumerate(lines[:-2]): + next_line_1 = lines[idx + 1] + next_line_2 = lines[idx + 2] + match = re.search(table_regex, line) + cond1 = match is not None + cond2 = next_line_1.strip() == '' + cond3 = next_line_2.startswith('|') + + if all([cond1, cond2, cond3]): + lines[idx + 1] = line + lines[idx] = '' + + return '\n'.join(lines)
+ + +
+[docs] + def run(self, rate_limit=175e3): + """Run text embedding in serial + + Parameters + ---------- + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + embedding limit is 350k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + + Returns + ------- + embedding : list + List of 1D arrays representing the embeddings for all text chunks + """ + + logger.info('Embedding {} text chunks...' + .format(len(self.text_chunks))) + + embeddings = [] + for i, chunk in enumerate(self.text_chunks): + req = {"input": chunk, "model": self.model} + + if 'azure' in str(openai.api_type).lower(): + req['engine'] = self.model + + out = self.call_api(self.EMBEDDING_URL, self.HEADERS, req) + + try: + out = out['data'][0]['embedding'] + embeddings.append(out) + except Exception: + msg = ('Could not get embeddings for chunk {}, ' + 'received API response: {}'.format(i + 1, out)) + logger.error(msg) + embeddings.append(None) + + logger.info('Finished all embeddings.') + + return embeddings
+ + +
+[docs] + async def run_async(self, rate_limit=175e3): + """Run text embedding on chunks asynchronously + + NOTE: you need to call this using the await command in ipython or + jupyter, e.g.: `out = await ChunkAndEmbed.run_async()` + + Parameters + ---------- + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + embedding limit is 350k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + + Returns + ------- + embedding : list + List of 1D arrays representing the embeddings for all text chunks + """ + + logger.info('Embedding {} text chunks...' + .format(len(self.text_chunks))) + + all_request_jsons = [] + for chunk in self.text_chunks: + req = {"input": chunk, "model": self.model} + + if 'azure' in str(openai.api_type).lower(): + req['engine'] = self.model + + all_request_jsons.append(req) + + embeddings = await self.call_api_async(self.EMBEDDING_URL, + self.HEADERS, + all_request_jsons, + rate_limit=rate_limit) + + for i, chunk in enumerate(embeddings): + try: + embeddings[i] = chunk['data'][0]['embedding'] + except Exception: + msg = ('Could not get embeddings for chunk {}, ' + 'received API response: {}'.format(i + 1, chunk)) + logger.error(msg) + embeddings[i] = None + + logger.info('Finished all embeddings.') + + return embeddings
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/exceptions.html b/_modules/elm/exceptions.html new file mode 100644 index 00000000..20138502 --- /dev/null +++ b/_modules/elm/exceptions.html @@ -0,0 +1,327 @@ + + + + + + elm.exceptions — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.exceptions

+# -*- coding: utf-8 -*-
+"""Custom Exceptions and Errors for ELM. """
+
+
+
+[docs] +class ELMError(Exception): + """Generic ELM Error."""
+ + + +
+[docs] +class ELMRuntimeError(ELMError, RuntimeError): + """ELM RuntimeError."""
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/download.html b/_modules/elm/ords/download.html new file mode 100644 index 00000000..4d7939c7 --- /dev/null +++ b/_modules/elm/ords/download.html @@ -0,0 +1,523 @@ + + + + + + elm.ords.download — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.download

+# -*- coding: utf-8 -*-
+"""ELM Ordinance county file downloading logic"""
+import pprint
+import asyncio
+import logging
+from itertools import zip_longest, chain
+from contextlib import AsyncExitStack
+
+from elm.ords.llm import StructuredLLMCaller
+from elm.ords.extraction import check_for_ordinance_info
+from elm.ords.services.threaded import TempFileCache
+from elm.ords.validation.location import CountyValidator
+from elm.web.document import PDFDocument
+from elm.web.file_loader import AsyncFileLoader
+from elm.web.google_search import PlaywrightGoogleLinkSearch
+
+
+logger = logging.getLogger(__name__)
+QUESTION_TEMPLATES = [
+    '0. "wind energy conversion system zoning ordinances {location}"',
+    '1. "{location} wind WECS zoning ordinance"',
+    '2. "Where can I find the legal text for commercial wind energy '
+    'conversion system zoning ordinances in {location}?"',
+    '3. "What is the specific legal information regarding zoning '
+    'ordinances for commercial wind energy conversion systems in {location}?"',
+]
+
+
+async def _search_single(
+    location, question, browser_sem, num_results=10, **kwargs
+):
+    """Perform a single google search."""
+    if browser_sem is None:
+        browser_sem = AsyncExitStack()
+
+    search_engine = PlaywrightGoogleLinkSearch(**kwargs)
+    async with browser_sem:
+        return await search_engine.results(
+            question.format(location=location),
+            num_results=num_results,
+        )
+
+
+async def _find_urls(location, num_results=10, browser_sem=None, **kwargs):
+    """Parse google search output for URLs."""
+    searchers = [
+        asyncio.create_task(
+            _search_single(
+                location, q, browser_sem, num_results=num_results, **kwargs
+            ),
+            name=location,
+        )
+        for q in QUESTION_TEMPLATES
+    ]
+    return await asyncio.gather(*searchers)
+
+
+def _down_select_urls(search_results, num_urls=5):
+    """Select the top 5 URLs."""
+    all_urls = chain.from_iterable(
+        zip_longest(*[results[0] for results in search_results])
+    )
+    urls = set()
+    for url in all_urls:
+        if not url:
+            continue
+        urls.add(url)
+        if len(urls) == num_urls:
+            break
+    return urls
+
+
+async def _load_docs(urls, text_splitter, browser_semaphore=None, **kwargs):
+    """Load a document for each input URL."""
+    loader_kwargs = {
+        "html_read_kwargs": {"text_splitter": text_splitter},
+        "file_cache_coroutine": TempFileCache.call,
+        "browser_semaphore": browser_semaphore,
+    }
+    loader_kwargs.update(kwargs)
+    file_loader = AsyncFileLoader(**loader_kwargs)
+    docs = await file_loader.fetch_all(*urls)
+
+    logger.debug(
+        "Loaded the following number of pages for docs: %s",
+        pprint.PrettyPrinter().pformat(
+            {
+                doc.metadata.get("source", "Unknown"): len(doc.pages)
+                for doc in docs
+            }
+        ),
+    )
+    return [doc for doc in docs if not doc.empty]
+
+
+async def _down_select_docs_correct_location(
+    docs, location, county, state, **kwargs
+):
+    """Remove all documents not pertaining to the location."""
+    llm_caller = StructuredLLMCaller(**kwargs)
+    county_validator = CountyValidator(llm_caller)
+    searchers = [
+        asyncio.create_task(
+            county_validator.check(doc, county=county, state=state),
+            name=location,
+        )
+        for doc in docs
+    ]
+    output = await asyncio.gather(*searchers)
+    correct_loc_docs = [doc for doc, check in zip(docs, output) if check]
+    return sorted(
+        correct_loc_docs,
+        key=lambda doc: (not isinstance(doc, PDFDocument), len(doc.text)),
+    )
+
+
+async def _check_docs_for_ords(docs, text_splitter, **kwargs):
+    """Check documents to see if they contain ordinance info."""
+    ord_docs = []
+    for doc in docs:
+        doc = await check_for_ordinance_info(doc, text_splitter, **kwargs)
+        if doc.metadata["contains_ord_info"]:
+            ord_docs.append(doc)
+    return ord_docs
+
+
+def _parse_all_ord_docs(all_ord_docs):
+    """Parse a list of documents and get the result for the best match."""
+    if not all_ord_docs:
+        return None
+
+    return sorted(all_ord_docs, key=_ord_doc_sorting_key)[-1]
+
+
+def _ord_doc_sorting_key(doc):
+    """All text sorting key"""
+    year, month, day = doc.metadata.get("date", (-1, -1, -1))
+    return year, isinstance(doc, PDFDocument), -1 * len(doc.text), month, day
+
+
+
+[docs] +async def download_county_ordinance( + location, + text_splitter, + num_urls=5, + file_loader_kwargs=None, + browser_semaphore=None, + **kwargs +): + """Download the ordinance document for a single county. + + Parameters + ---------- + location : elm.ords.utilities.location.Location + Location objects representing the county. + text_splitter : obj, optional + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + num_urls : int, optional + Number of unique Google search result URL's to check for + ordinance document. By default, ``5``. + file_loader_kwargs : dict, optional + Dictionary of keyword-argument pairs to initialize + :class:`elm.web.file_loader.AsyncFileLoader` with. The + "pw_launch_kwargs" key in these will also be used to initialize + the :class:`elm.web.google_search.PlaywrightGoogleLinkSearch` + used for the google URL search. By default, ``None``. + browser_semaphore : asyncio.Semaphore, optional + Semaphore instance that can be used to limit the number of + playwright browsers open concurrently. If ``None``, no limits + are applied. By default, ``None``. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument | None + Document instance for the downloaded document, or ``None`` if no + document was found. + """ + file_loader_kwargs = file_loader_kwargs or {} + pw_launch_kwargs = file_loader_kwargs.get("pw_launch_kwargs", {}) + urls = await _find_urls( + location.full_name, + num_results=10, + browser_sem=browser_semaphore, + **pw_launch_kwargs + ) + urls = _down_select_urls(urls, num_urls=num_urls) + logger.debug("Downloading documents for URLS: \n\t-%s", "\n\t-".join(urls)) + docs = await _load_docs( + urls, text_splitter, browser_semaphore, **file_loader_kwargs + ) + docs = await _down_select_docs_correct_location( + docs, + location=location.full_name, + county=location.name, + state=location.state, + **kwargs + ) + docs = await _check_docs_for_ords(docs, text_splitter, **kwargs) + logger.info( + "Found %d potential ordinance documents for %s", + len(docs), + location.full_name, + ) + return _parse_all_ord_docs(docs)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/apply.html b/_modules/elm/ords/extraction/apply.html new file mode 100644 index 00000000..2d80f829 --- /dev/null +++ b/_modules/elm/ords/extraction/apply.html @@ -0,0 +1,633 @@ + + + + + + elm.ords.extraction.apply — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.apply

+# -*- coding: utf-8 -*-
+"""ELM Ordinance function to apply ordinance extraction on a document """
+import logging
+from warnings import warn
+
+from elm.ords.llm import LLMCaller, StructuredLLMCaller
+from elm.ords.extraction.date import DateExtractor
+from elm.ords.extraction.ordinance import (
+    OrdinanceValidator,
+    OrdinanceExtractor,
+)
+from elm.ords.extraction.parse import StructuredOrdinanceParser
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +async def check_for_ordinance_info(doc, text_splitter, **kwargs): + """Parse a single document for ordinance information. + + Parameters + ---------- + doc : elm.web.document.BaseDocument + A document potentially containing ordinance information. Note + that if the document's metadata contains the + ``"contains_ord_info"`` key, it will not be processed. To force + a document to be processed by this function, remove that key + from the documents metadata. + text_splitter : obj + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument + Document that has been parsed for ordinance text. The results of + the parsing are stored in the documents metadata. In particular, + the metadata will contain a ``"contains_ord_info"`` key that + will be set to ``True`` if ordinance info was found in the text, + and ``False`` otherwise. If ``True``, the metadata will also + contain a ``"date"`` key containing the most recent date that + the ordinance was enacted (or a tuple of `None` if not found), + and an ``"ordinance_text"`` key containing the ordinance text + snippet. Note that the snippet may contain other info as well, + but should encapsulate all of the ordinance text. + """ + if "contains_ord_info" in doc.metadata: + return doc + + llm_caller = StructuredLLMCaller(**kwargs) + chunks = text_splitter.split_text(doc.text) + validator = OrdinanceValidator(llm_caller, chunks) + doc.metadata["contains_ord_info"] = await validator.parse() + if doc.metadata["contains_ord_info"]: + doc.metadata["date"] = await DateExtractor(llm_caller).parse(doc) + doc.metadata["ordinance_text"] = validator.ordinance_text + + return doc
+ + + +
+[docs] +async def extract_ordinance_text_with_llm(doc, text_splitter, extractor): + """Extract ordinance text from document using LLM. + + Parameters + ---------- + doc : elm.web.document.BaseDocument + A document known to contain ordinance information. This means it + must contain an ``"ordinance_text"`` key in the metadata. You + can run + :func:`~elm.ords.extraction.apply.check_for_ordinance_info` + to have this attribute populated automatically for documents + that are found to contain ordinance data. Note that if the + document's metadata does not contain the ``"ordinance_text"`` + key, you will get an error. + text_splitter : obj + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + extractor : elm.ords.extraction.ordinance.OrdinanceExtractor + Instance of `~elm.ords.extraction.ordinance.OrdinanceExtractor` + used for ordinance text extraction. + + Returns + ------- + elm.web.document.BaseDocument + Document that has been parsed for ordinance text. The results of + the extraction are stored in the document's metadata. In + particular, the metadata will contain a + ``"cleaned_ordinance_text"`` key that will contain the cleaned + ordinance text. + """ + text_chunks = text_splitter.split_text(doc.metadata["ordinance_text"]) + ordinance_text = await extractor.check_for_restrictions(text_chunks) + doc.metadata["restrictions_ordinance_text"] = ordinance_text + + text_chunks = text_splitter.split_text(ordinance_text) + ordinance_text = await extractor.check_for_correct_size(text_chunks) + doc.metadata["cleaned_ordinance_text"] = ordinance_text + + return doc
+ + + +
+[docs] +async def extract_ordinance_text_with_ngram_validation( + doc, + text_splitter, + n=4, + num_extraction_attempts=3, + ngram_fraction_threshold=0.95, + **kwargs, +): + """Extract ordinance text for a single document with known ord info. + + This extraction includes an "ngram" check, which attempts to detect + wether or not the cleaned text was extracted from the original + ordinance text. The processing will attempt to re-extract the text + if the validation does not pass a certain threshold until the + maximum number of attempts is reached. If the text still does not + pass validation at this point, there is a good chance that the LLM + hallucinated parts of the output text, so caution should be taken. + + Parameters + ---------- + doc : elm.web.document.BaseDocument + A document known to contain ordinance information. This means it + must contain an ``"ordinance_text"`` key in the metadata. You + can run + :func:`~elm.ords.extraction.apply.check_for_ordinance_info` + to have this attribute populated automatically for documents + that are found to contain ordinance data. Note that if the + document's metadata does not contain the ``"ordinance_text"`` + key, it will not be processed. + text_splitter : obj + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + n : int, optional + Number of words to include per ngram for the ngram validation, + which helps ensure that the LLM did not hallucinate. + By default, ``4``. + num_extraction_attempts : int, optional + Number of extraction attempts before returning text that did not + pass the ngram check. If the processing exceeds this value, + there is a good chance that the LLM hallucinated parts of the + output text. Cannot be negative or 0. By default, ``3``. + ngram_fraction_threshold : float, optional + Fraction of ngrams in the cleaned text that are also found in + the original ordinance text for the extraction to be considered + successful. Should be a value between 0 and 1 (inclusive). + By default, ``0.95``. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument + Document that has been parsed for ordinance text. The results of + the extraction are stored in the document's metadata. In + particular, the metadata will contain a + ``"cleaned_ordinance_text"`` key that will contain the cleaned + ordinance text. + """ + if not doc.metadata.get("ordinance_text"): + msg = ( + "Input document has no 'ordinance_text' key or string does not " + "contain information. Please run `check_for_ordinance_info` " + "prior to calling this method." + ) + logger.warning(msg) + warn(msg, UserWarning) + return doc + + llm_caller = LLMCaller(**kwargs) + extractor = OrdinanceExtractor(llm_caller) + + doc = await _extract_with_ngram_check( + doc, + text_splitter, + extractor, + n=max(1, n), + num_tries=max(1, num_extraction_attempts), + ngram_fraction_threshold=max(0, min(1, ngram_fraction_threshold)), + ) + + return doc
+ + + +async def _extract_with_ngram_check( + doc, + text_splitter, + extractor, + n=4, + num_tries=3, + ngram_fraction_threshold=0.95, +): + """Extract ordinance info from doc and validate using ngrams.""" + from elm.ords.extraction.ngrams import sentence_ngram_containment + + source = doc.metadata.get("source", "Unknown") + og_text = doc.metadata["ordinance_text"] + if not og_text: + msg = ( + "Document missing original ordinance text! No extraction " + "performed (Document source: %s)", + source, + ) + logger.warning(msg) + warn(msg, UserWarning) + return doc + + best_score = 0 + best_summary = "" + for attempt in range(num_tries): + doc = await extract_ordinance_text_with_llm( + doc, text_splitter, extractor + ) + cleaned_text = doc.metadata["cleaned_ordinance_text"] + if not cleaned_text: + logger.debug( + "No cleaned text found after extraction on attempt %d " + "for document with source %s. Retrying...", + attempt, + source, + ) + continue + + ngram_frac = sentence_ngram_containment( + original=og_text, test=cleaned_text, n=n + ) + if ngram_frac >= ngram_fraction_threshold: + logger.debug( + "Document extraction passed ngram check on attempt %d " + "with score %.2f (Document source: %s)", + attempt + 1, + ngram_frac, + source, + ) + break + + if ngram_frac > best_score: + best_score = ngram_frac + best_summary = cleaned_text + + logger.debug( + "Document extraction failed ngram check on attempt %d " + "with score %.2f (Document source: %s). Retrying...", + attempt + 1, + ngram_frac, + source, + ) + else: + doc.metadata["cleaned_ordinance_text"] = best_summary + msg = ( + f"Ngram check failed after {num_tries}. LLM hallucination in " + "cleaned ordinance text is extremely likely! Proceed with " + f"caution!! (Document source: {best_score})" + ) + logger.warning(msg) + warn(msg, UserWarning) + + return doc + + +
+[docs] +async def extract_ordinance_values(doc, **kwargs): + """Extract ordinance values for a single document with known ord text. + + Parameters + ---------- + doc : elm.web.document.BaseDocument + A document known to contain ordinance text. This means it must + contain an ``"cleaned_ordinance_text"`` key in the metadata. You + can run + :func:`~elm.ords.extraction.apply.extract_ordinance_text` + to have this attribute populated automatically for documents + that are found to contain ordinance data. Note that if the + document's metadata does not contain the + ``"cleaned_ordinance_text"`` key, it will not be processed. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument + Document that has been parsed for ordinance values. The results + of the extraction are stored in the document's metadata. In + particular, the metadata will contain an ``"ordinance_values"`` + key that will contain the DataFame with ordinance values. + """ + if not doc.metadata.get("cleaned_ordinance_text"): + msg = ( + "Input document has no 'cleaned_ordinance_text' key or string " + "does not contain info. Please run `extract_ordinance_text` " + "prior to calling this method." + ) + logger.warning(msg) + warn(msg, UserWarning) + return doc + + parser = StructuredOrdinanceParser(**kwargs) + text = doc.metadata["cleaned_ordinance_text"] + doc.metadata["ordinance_values"] = await parser.parse(text) + return doc
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/date.html b/_modules/elm/ords/extraction/date.html new file mode 100644 index 00000000..cd910e54 --- /dev/null +++ b/_modules/elm/ords/extraction/date.html @@ -0,0 +1,422 @@ + + + + + + elm.ords.extraction.date — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.date

+# -*- coding: utf-8 -*-
+"""ELM Ordinance date extraction logic."""
+import logging
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class DateExtractor: + """Helper class to extract date info from document.""" + + SYSTEM_MESSAGE = ( + "You are a legal scholar that reads ordinance text and extracts " + "structured date information. Return your answer in JSON format (not " + "markdown). Your JSON file must include exactly four keys. The first " + "key is 'explanation', which contains a short summary of the most " + "relevant date information you found in the text. The second key is " + "'year', which should contain an integer value that represents the " + "latest year this ordinance was enacted/updated, or null if that " + "information cannot be found in the text. The third key is 'month', " + "which should contain an integer value that represents the latest " + "month of the year this ordinance was enacted/updated, or null if " + "that information cannot be found in the text. The fourth key is " + "'day', which should contain an integer value that represents the " + "latest day of the month this ordinance was enacted/updated, or null " + "if that information cannot be found in the text." + ) + + def __init__(self, structured_llm_caller): + """ + + Parameters + ---------- + structured_llm_caller : elm.ords.llm.StructuredLLMCaller + StructuredLLMCaller instance. Used for structured validation + queries. + """ + self.slc = structured_llm_caller + +
+[docs] + async def parse(self, doc): + """Extract date (year, month, day) from doc. + + Parameters + ---------- + doc : elm.web.document.BaseDocument + Document with a `raw_pages` attribute. + + Returns + ------- + tuple + 3-tuple containing year, month, day, or ``None`` if any of + those are not found. + """ + all_years = [] + if not doc.raw_pages: + return None, None, None + + for text in doc.raw_pages: + if not text: + continue + + response = await self.slc.call( + sys_msg=self.SYSTEM_MESSAGE, + content=f"Please extract the date for this ordinance:\n{text}", + usage_sub_label="date_extraction", + ) + if not response: + continue + all_years.append(response) + + return _parse_date(all_years)
+
+ + + +def _parse_date(json_list): + """Parse all date elements.""" + year = _parse_date_element( + json_list, + key="year", + max_len=4, + min_val=2000, + max_val=float("inf"), + ) + month = _parse_date_element( + json_list, key="month", max_len=2, min_val=1, max_val=12 + ) + day = _parse_date_element( + json_list, key="day", max_len=2, min_val=1, max_val=31 + ) + + return year, month, day + + +def _parse_date_element(json_list, key, max_len, min_val, max_val): + """Parse out a single date element.""" + date_elements = [info.get(key) for info in json_list] + logger.debug(f"{key=}, {date_elements=}") + date_elements = [ + int(y) + for y in date_elements + if y is not None + and len(str(y)) <= max_len + and (min_val <= int(y) <= max_val) + ] + if not date_elements: + return -1 * float("inf") + return max(date_elements) +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/features.html b/_modules/elm/ords/extraction/features.html new file mode 100644 index 00000000..39cd942b --- /dev/null +++ b/_modules/elm/ords/extraction/features.html @@ -0,0 +1,411 @@ + + + + + + elm.ords.extraction.features — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.features

+# -*- coding: utf-8 -*-
+"""ELM Ordinance mutually-exclusive features class."""
+
+
+
+[docs] +class SetbackFeatures: + """Utility class to get mutually-exclusive feature descriptions.""" + + DEFAULT_FEATURE_DESCRIPTIONS = { + "struct": [ + "occupied dwellings", + "buildings", + "structures", + "residences", + ], + "pline": ["property lines", "parcels", "subdivisions"], + "roads": ["roads"], # , "rights-of-way"], + "rail": ["railroads"], + "trans": [ + "overhead electrical transmission lines", + "overhead utility lines", + "utility easements", + "utility lines", + "power lines", + "electrical lines", + "transmission lines", + ], + "water": ["lakes", "reservoirs", "streams", "rivers", "wetlands"], + } + FEATURES_AS_IGNORE = { + "struct": "structures", + "pline": "property lines", + "roads": "roads", + "rail": "railroads", + "trans": "transmission lines", + "water": "wetlands", + } + FEATURE_CLARIFICATIONS = { + "struct": "", + "pline": "", + "roads": "Roads may also be labeled as rights-of-way. ", + "rail": "", + "trans": "", + "water": "", + } + + def __init__(self): + self._validate_descriptions() + + def __iter__(self): + for feature_id in self.DEFAULT_FEATURE_DESCRIPTIONS: + feature, ignore = self._keep_and_ignore(feature_id) + clarification = self.FEATURE_CLARIFICATIONS.get(feature_id, "") + yield { + "feature_id": feature_id, + "feature": feature, + "ignore_features": ignore, + "feature_clarifications": clarification, + } + + def _validate_descriptions(self): + """Ensure all features have at least one description.""" + features_missing_descriptors = set() + for feature, descriptions in self.DEFAULT_FEATURE_DESCRIPTIONS.items(): + if len(descriptions) < 1: + features_missing_descriptors.add(feature) + + if any(features_missing_descriptors): + raise ValueError( + f"The following features are missing descriptors: " + f"{features_missing_descriptors}" + ) + + def _keep_and_ignore(self, feature_id): + """Get the keep and ignore phrases for a feature.""" + keep_keywords = self.DEFAULT_FEATURE_DESCRIPTIONS[feature_id] + ignore = [ + keyword + for feat_id, keyword in self.FEATURES_AS_IGNORE.items() + if feat_id != feature_id + ] + + keep_phrase = _join_keywords(keep_keywords, final_sep=", and/or ") + ignore_phrase = _join_keywords(ignore, final_sep=", and ") + + return keep_phrase, ignore_phrase
+ + + +def _join_keywords(keywords, final_sep): + """Join a list of keywords/descriptions.""" + if len(keywords) < 1: + return "" + + if len(keywords) == 1: + return keywords[0] + + comma_separated = ", ".join(keywords[:-1]) + return final_sep.join([comma_separated, keywords[-1]]) +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/graphs.html b/_modules/elm/ords/extraction/graphs.html new file mode 100644 index 00000000..b7101be4 --- /dev/null +++ b/_modules/elm/ords/extraction/graphs.html @@ -0,0 +1,822 @@ + + + + + + elm.ords.extraction.graphs — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.graphs

+# -*- coding: utf-8 -*-
+"""ELM Ordinance Decision Tree Graph setup functions."""
+import networkx as nx
+
+
+_SECTION_PROMPT = (
+    'The value of the "section" key should be a string representing the '
+    "title of the section (including numerical labels), if it's given, "
+    "and `null` otherwise."
+)
+_COMMENT_PROMPT = (
+    'The value of the "comment" key should be a one-sentence explanation '
+    "of how you determined the value, if you think it is necessary "
+    "(`null` otherwise)."
+)
+EXTRACT_ORIGINAL_TEXT_PROMPT = (
+    "Can you extract the raw text with original formatting "
+    "that states how close I can site {wes_type} to {feature}? "
+)
+
+
+def _setup_graph_no_nodes(**kwargs):
+    return nx.DiGraph(
+        SECTION_PROMPT=_SECTION_PROMPT,
+        COMMENT_PROMPT=_COMMENT_PROMPT,
+        **kwargs
+    )
+
+
+
+[docs] +def llm_response_starts_with_yes(response): + """Check if LLM response begins with "yes" (case-insensitive) + + Parameters + ---------- + response : str + LLM response string. + + Returns + ------- + bool + `True` if LLM response begins with "Yes". + """ + return response.lower().startswith("yes")
+ + + +
+[docs] +def llm_response_starts_with_no(response): + """Check if LLM response begins with "no" (case-insensitive) + + Parameters + ---------- + response : str + LLM response string. + + Returns + ------- + bool + `True` if LLM response begins with "No". + """ + return response.lower().startswith("no")
+ + + +
+[docs] +def llm_response_does_not_start_with_no(response): + """Check if LLM response does not start with "no" (case-insensitive) + + Parameters + ---------- + response : str + LLM response string. + + Returns + ------- + bool + `True` if LLM response does not begin with "No". + """ + return not llm_response_starts_with_no(response)
+ + + +
+[docs] +def setup_graph_wes_types(**kwargs): + """Setup Graph to get the largest turbine size in the ordinance text. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "Does the following text distinguish between multiple " + "turbine sizes? Distinctions are often made as 'small' vs 'large' " + "wind energy conversion systems or actual MW values. " + "Begin your response with either 'Yes' or 'No' and explain your " + "answer." + '\n\n"""\n{text}\n"""' + ), + ) + + G.add_edge("init", "get_text", condition=llm_response_starts_with_yes) + G.add_node( + "get_text", + prompt=( + "What are the different turbine sizes this text mentions? " + "List them in order of increasing size." + ), + ) + G.add_edge("get_text", "final") + G.add_node( + "final", + prompt=( + "Respond based on our entire conversation so far. Return your " + "answer in JSON format (not markdown). Your JSON file must " + 'include exactly two keys. The keys are "largest_wes_type" and ' + '"explanation". The value of the "largest_wes_type" key should ' + "be a string that labels the largest wind energy conversion " + 'system mentioned in the text. The value of the "explanation" ' + "key should be a string containing a short explanation for your " + "choice." + ), + ) + return G
+ + + +
+[docs] +def setup_base_graph(**kwargs): + """Setup Graph to get setback ordinance text for a particular feature. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "Is there text in the following legal document that describes " + "how close I can site or how far I have to setback " + "{wes_type} to {feature}? {feature_clarifications}" + "Pay extra attention to clarifying text found in parentheses " + "and footnotes. Begin your response with either 'Yes' or 'No' " + "and explain your answer." + '\n\n"""\n{text}\n"""' + ), + ) + + G.add_edge( + "init", "get_text", condition=llm_response_does_not_start_with_no + ) + G.add_node("get_text", prompt=EXTRACT_ORIGINAL_TEXT_PROMPT) + + return G
+ + + +
+[docs] +def setup_participating_owner(**kwargs): + """Setup Graph to check for participating vs non-participating owner + setbacks for a feature. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "Does the ordinance for {feature} setbacks explicitly specify " + "a value that applies to participating owners? Occupying owners " + "are not participating owners unless explicitly mentioned in the " + "text. Justify your answer by quoting the raw text directly." + ), + ) + G.add_edge("init", "non_part") + G.add_node( + "non_part", + prompt=( + "Does the ordinance for {feature} setbacks explicitly specify " + "a value that applies to non-participating owners? Non-occupying " + "owners are not non-participating owners unless explicitly " + "mentioned in the text. Justify your answer by quoting the raw " + "text directly." + ), + ) + G.add_edge("non_part", "final") + G.add_node( + "final", + prompt=( + "Now we are ready to extract structured data. Respond based on " + "our entire conversation so far. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly two " + 'keys. The keys are "participating" and "non-participating". The ' + 'value of the "participating" key should be a string containing ' + "the raw text with original formatting from the ordinance that " + "applies to participating owners or `null` if there was no such " + 'text. The value of the "non-participating" key should be a ' + "string containing the raw text with original formatting from the " + "ordinance that applies to non-participating owners or simply the " + "full ordinance if the text did not make the distinction between " + "participating and non-participating owners." + ), + ) + return G
+ + + +
+[docs] +def setup_multiplier(**kwargs): + """Setup Graph to extract a setbacks multiplier values for a feature. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "We will attempt to extract structured data for this ordinance. " + "Let's think step by step. Does the text mention a multiplier " + "that should be applied to a turbine dimension (e.g. height, " + "rotor diameter, etc) to compute the setback distance from " + "{feature}? Ignore any text related to {ignore_features}. " + "Remember that 1 is a valid multiplier, and treat any mention of " + "'fall zone' as a system height multiplier of 1. Begin your " + "response with either 'Yes' or 'No' and explain your answer." + ), + ) + G.add_edge("init", "no_multiplier", condition=llm_response_starts_with_no) + G.add_node( + "no_multiplier", + prompt=( + "Does the ordinance give the setback from {feature} as a fixed " + "distance value? Explain yourself." + ), + ) + G.add_edge("no_multiplier", "out_static") + G.add_node( + "out_static", + prompt=( + "Now we are ready to extract structured data. Respond based on " + "our entire conversation so far. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly " + 'four keys. The keys are "fixed_value", "units", "section", ' + '"comment". The value of the "fixed_value" key should be a ' + "numerical value corresponding to the setback distance value " + "from {feature} or `null` if there was no such value. The value " + 'of the "units" key should be a string corresponding to the units ' + "of the setback distance value from {feature} or `null` if there " + 'was no such value. {SECTION_PROMPT} The value of the "comment" ' + "key should be a one-sentence explanation of how you determined " + "the value, or a short description of the ordinance itself if no " + "multiplier or static setback value was found." + ), + ) + G.add_edge("init", "mult_single", condition=llm_response_starts_with_yes) + + G.add_node( + "mult_single", + prompt=( + "Are multiple values given for the multiplier used to " + "compute the setback distance value from {feature}? If so, " + "select and state the largest one. Otherwise, repeat the single " + "multiplier value that was given in the text. " + ), + ) + G.add_edge("mult_single", "mult_type") + G.add_node( + "mult_type", + prompt=( + "What should the multiplier be applied to? Common acronyms " + "include RD for rotor diameter and HH for hub height. Remember " + "that system/total height is the tip-hight of the turbine. " + "Select a value from the following list and explain yourself: " + "['tip-height-multiplier', 'hub-height-multiplier', " + "'rotor-diameter-multiplier]" + ), + ) + + G.add_edge("mult_type", "adder") + G.add_node( + "adder", + prompt=( + "Does the ordinance include a static distance value that " + "should be added to the result of the multiplication? Do not " + "confuse this value with static setback requirements. Ignore text " + "with clauses such as 'no lesser than', 'no greater than', " + "'the lesser of', or 'the greater of'. Begin your response with " + "either 'Yes' or 'No' and explain your answer, stating the adder " + "value if it exists." + ), + ) + G.add_edge("adder", "out_mult", condition=llm_response_starts_with_no) + G.add_edge("adder", "adder_eq", condition=llm_response_starts_with_yes) + + G.add_node( + "adder_eq", + prompt=( + "We are only interested in adders that satisfy the following " + "equation: 'multiplier * turbine_dimension + <adder>'. Does the " + "adder value you identified satisfy this equation? Begin your " + "response with either 'Yes' or 'No' and explain your answer." + ), + ) + G.add_edge("adder_eq", "out_mult", condition=llm_response_starts_with_no) + G.add_edge( + "adder_eq", + "conversion", + condition=llm_response_starts_with_yes, + ) + G.add_node( + "conversion", + prompt=( + "If the adder value is not given in feet, convert " + "it to feet (remember that there are 3.28084 feet in one meter " + "and 5280 feet in one mile). Show your work step-by-step " + "if you had to perform a conversion." + ), + ) + G.add_edge("conversion", "out_mult") + + G.add_node( + "out_mult", + prompt=( + "Now we are ready to extract structured data. Respond based on " + "our entire conversation so far. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly five " + 'keys. The keys are "mult_value", "mult_type", "adder", ' + '"section", "comment". The value of the "mult_value" key should ' + "be a numerical value corresponding to the multiplier value we " + 'determined earlier. The value of the "mult_type" key should be ' + "a string corresponding to the dimension that the multiplier " + "should be applied to, as we determined earlier. The value of " + 'the "adder" key should be a numerical value corresponding to ' + "the static value to be added to the total setback distance after " + "multiplication, as we determined earlier, or `null` if there is " + "no such value. {SECTION_PROMPT} {COMMENT_PROMPT}" + ), + ) + + return G
+ + + +
+[docs] +def setup_conditional(**kwargs): + """Setup Graph to extract min/max setback values (after mult) for a + feature. These are typically given within the context of + 'the greater of' or 'the lesser of' clauses. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "We will attempt to extract structured data for this ordinance. " + "Let's think step by step. Does the setback from {feature} " + "mention a minimum or maximum static setback distance regardless " + "of the outcome of the multiplier calculation? This is often " + "phrased as 'the greater of' or 'the lesser of'. Do not confuse " + "this value with static values to be added to multiplicative " + "setbacks. Begin your response with either 'Yes' or 'No' and " + "explain your answer." + ), + ) + + G.add_edge("init", "conversions", condition=llm_response_starts_with_yes) + G.add_node( + "conversions", + prompt=( + "Tell me the minimum and/or maximum setback distances, " + "converting to feet if necessary (remember that there are " + "3.28084 feet in one meter and 5280 feet in one mile). " + "Explain your answer and show your work if you had to perform " + "a conversion." + ), + ) + + G.add_edge("conversions", "out_condition") + G.add_node( + "out_condition", + prompt=( + "Now we are ready to extract structured data. Respond based " + "on our entire conversation so far. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly two " + 'keys. The keys are "min_dist" and "max_dist". The value of the ' + '"min_dist" key should be a numerical value corresponding to the ' + "minimum setback value from {feature} we determined earlier, or " + '`null` if no such value exists. The value of the "max_dist" key ' + "should be a numerical value corresponding to the maximum setback " + "value from {feature} we determined earlier, or `null` if no such " + "value exists." + ), + ) + + return G
+ + + +
+[docs] +def setup_graph_extra_restriction(**kwargs): + """Setup Graph to extract non-setback ordinance values from text. + + Parameters + ---------- + **kwargs + Keyword-value pairs to add to graph. + + Returns + ------- + nx.DiGraph + Graph instance that can be used to initialize an + `elm.tree.DecisionTree`. + """ + G = _setup_graph_no_nodes(**kwargs) + + G.add_node( + "init", + prompt=( + "We will attempt to extract structured data for this " + "ordinance. Let's think step by step. Does the following text " + "explicitly limit the {restriction} allowed for {wes_type}? " + "Do not infer based on other restrictions; if this particular " + "restriction is not explicitly mentioned then say 'No'. Pay extra " + "attention to clarifying text found in parentheses and footnotes. " + "Begin your response with either 'Yes' or 'No' and explain " + "your answer." + '\n\n"""\n{text}\n"""' + ), + ) + G.add_edge("init", "final", condition=llm_response_starts_with_yes) + + G.add_node( + "final", + prompt=( + "Now we are ready to extract structured data. Respond based " + "on our entire conversation so far. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly four " + 'keys. The keys are "value", "units", "section", "comment". The ' + 'value of the "value" key should be a numerical value ' + "corresponding to the {restriction} allowed for {wes_type}, or " + "`null` if the text does not mention such a restriction. Use our " + 'conversation to fill out this value. The value of the "units" ' + "key should be a string corresponding to the units for the " + "{restriction} allowed for {wes_type} by the text below, or " + "`null` if the text does not mention such a restriction. Make " + 'sure to include any "per XXX" clauses in the units. ' + "{SECTION_PROMPT} {COMMENT_PROMPT}" + ), + ) + return G
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/ngrams.html b/_modules/elm/ords/extraction/ngrams.html new file mode 100644 index 00000000..c4dc7245 --- /dev/null +++ b/_modules/elm/ords/extraction/ngrams.html @@ -0,0 +1,407 @@ + + + + + + elm.ords.extraction.ngrams — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.ngrams

+# -*- coding: utf-8 -*-
+"""ELM Ordinance ngram text validation
+
+This check helps validate that the LLM extracted text from the original
+document and did not make it up itself.
+"""
+import nltk
+from nltk.tokenize import word_tokenize
+from nltk.tokenize import sent_tokenize
+from nltk.corpus import stopwords
+from nltk.util import ngrams
+
+
+nltk.download("punkt")
+nltk.download("stopwords")
+STOP_WORDS = set(stopwords.words("english"))
+PUNCTUATIONS = {'"', ".", "(", ")", ",", "?", ";", ":", "''", "``"}
+
+
+def _check_word(word):
+    """``True`` if a word is not a stop word or a punctuation."""
+    return word not in STOP_WORDS and word not in PUNCTUATIONS
+
+
+def _filtered_words(sentence):
+    """Filter out common words and punctuations."""
+    return [
+        word.casefold()
+        for word in word_tokenize(sentence)
+        if _check_word(word.casefold())
+    ]
+
+
+
+[docs] +def convert_text_to_sentence_ngrams(text, n): + """Convert input text to a list of ngrams. + + The text is first split byu sentence, after which each sentence is + converted into ngrams. The ngrams for all sentences are combined and + returned. + + Parameters + ---------- + text : str + Input text containing one or more sentences. + n : int + Number of words to include per ngram. + + Returns + ------- + list + List of tuples, where each tuple is an ngram from the original + text. + """ + all_ngrams = [] + sentences = sent_tokenize(text) + for sentence in sentences: + words = _filtered_words(sentence) + all_ngrams += list(ngrams(words, n)) + return all_ngrams
+ + + +
+[docs] +def sentence_ngram_containment(original, test, n): + """Fraction of sentence ngrams from the test text found in the original. + + Parameters + ---------- + original : str + Original (superset) text. Ngrams from the `test` text will be + checked against this text. + test : str + Test (sub) text. Ngrams from this text will be searched for in + the original text, and the fraction of these ngrams that are + found in the original text will be returned. + n : int + Number of words to include per ngram. + + Returns + ------- + float + Fraction of ngrams from the `test` input that were found in the + `original` text. Always returns ``True`` if test has no ngrams. + """ + ngrams_test = convert_text_to_sentence_ngrams(test, n) + num_test_ngrams = len(ngrams_test) + if not num_test_ngrams: + return True + + ngrams_original = set(convert_text_to_sentence_ngrams(original, n)) + num_ngrams_found = sum(t in ngrams_original for t in ngrams_test) + return num_ngrams_found / num_test_ngrams
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/ordinance.html b/_modules/elm/ords/extraction/ordinance.html new file mode 100644 index 00000000..05fc4958 --- /dev/null +++ b/_modules/elm/ords/extraction/ordinance.html @@ -0,0 +1,657 @@ + + + + + + elm.ords.extraction.ordinance — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for elm.ords.extraction.ordinance

+# -*- coding: utf-8 -*-
+"""ELM Ordinance document content Validation logic
+
+These are primarily used to validate that a legal document applies to a
+particular technology (e.g. Large Wind Energy Conversion Systems).
+"""
+import asyncio
+import logging
+
+from elm import ApiBase
+from elm.ords.validation.content import (
+    ValidationWithMemory,
+    possibly_mentions_wind,
+)
+from elm.ords.utilities.parsing import merge_overlapping_texts
+
+
+logger = logging.getLogger(__name__)
+
+
+RESTRICTIONS = """- buildings / structures / residences
+- property lines / parcels / subdivisions
+- roads / rights-of-way
+- railroads
+- overhead electrical transmission wires
+- bodies of water including wetlands, lakes, reservoirs, streams, and rivers
+- natural, wildlife, and environmental conservation areas
+- noise restrictions
+- shadow flicker restrictions
+- density restrictions
+- turbine height restrictions
+- minimum/maximum lot size
+"""
+
+
+
+[docs] +class OrdinanceValidator(ValidationWithMemory): + """Check document text for wind ordinances.""" + + IS_LEGAL_TEXT_PROMPT = ( + "You extract structured data from text. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly three " + "keys. The first key is 'summary', which is a string that provides a " + "short summary of the text. The second key is 'type', which is a " + "string that best represent the type of document this text belongs " + "to. The third key is '{key}', which is a boolean that is set to " + "True if the type of the text (as you previously determined) is a " + "legally-binding statute or code and False if the text is an excerpt " + "from other non-legal text such as a news article, survey, summary, " + "application, public notice, etc." + ) + + CONTAINS_ORD_PROMPT = ( + "You extract structured data from text. Return your answer in JSON " + "format (not markdown). Your JSON file must include exactly three " + "keys. The first key is 'wind_reqs', which is a string that " + "summarizes the setbacks or other geospatial siting requirements (if " + "any) given in the text for a wind turbine. The second key is 'reqs', " + "which lists the quantitative values from the text excerpt that can " + "be used to compute setbacks or other geospatial siting requirements " + "for a wind turbine/tower (empty list if none exist in the text). The " + "last key is '{key}', which is a boolean that is set to True if the " + "text excerpt provides enough quantitative info to compute setbacks " + "or other geospatial siting requirements for a wind turbine/tower " + "and False otherwise. Geospatial siting is impacted by any of the " + f"following:\n{RESTRICTIONS}" + ) + + IS_UTILITY_SCALE_PROMPT = ( + "You are a legal scholar that reads ordinance text and determines " + "wether it applies to large wind energy systems. Large wind energy " + "systems (WES) may also be referred to as wind turbines, wind energy " + "conversion systems (WECS), wind energy facilities (WEF), wind energy " + "turbines (WET), large wind energy turbines (LWET), utility-scale " + "wind energy turbines (UWET), commercial wind energy systems, or " + "similar. Your client is a commercial wind developer that does not " + "care about ordinances related to private, micro, small, or medium " + "sized wind energy systems. Ignore any text related to such systems. " + "Return your answer in JSON format (not markdown). Your JSON file " + "must include exactly two keys. The first key is 'summary' which " + "contains a string that summarizes the types of wind energy systems " + "the text applies to (if any). The second key is '{key}', which is a " + "boolean that is set to True if any part of the text excerpt is " + "applicable to the large wind energy conversion systems that the " + "client is interested in and False otherwise." + ) + + def __init__(self, structured_llm_caller, text_chunks, num_to_recall=2): + """ + + Parameters + ---------- + structured_llm_caller : elm.ords.llm.StructuredLLMCaller + StructuredLLMCaller instance. Used for structured validation + queries. + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. This validator may refer to previous text chunks to + answer validation questions. + num_to_recall : int, optional + Number of chunks to check for each validation call. This + includes the original chunk! For example, if + `num_to_recall=2`, the validator will first check the chunk + at the requested index, and then the previous chunk as well. + By default, ``2``. + """ + super().__init__( + structured_llm_caller=structured_llm_caller, + text_chunks=text_chunks, + num_to_recall=num_to_recall, + ) + self._legal_text_mem = [] + self._wind_mention_mem = [] + self._ordinance_chunks = [] + + @property + def is_legal_text(self): + """bool: ``True`` if text was found to be from a legal source.""" + if not self._legal_text_mem: + return False + return sum(self._legal_text_mem) >= 0.5 * len(self._legal_text_mem) + + @property + def ordinance_text(self): + """str: Combined ordinance text from the individual chunks.""" + inds_to_grab = set() + for info in self._ordinance_chunks: + inds_to_grab |= { + info["ind"] + x for x in range(1 - self.num_to_recall, 2) + } + + text = [ + self.text_chunks[ind] + for ind in sorted(inds_to_grab) + if 0 <= ind < len(self.text_chunks) + ] + return merge_overlapping_texts(text) + +
+[docs] + async def parse(self, min_chunks_to_process=3): + """Parse text chunks and look for ordinance text. + + Parameters + ---------- + min_chunks_to_process : int, optional + Minimum number of chunks to process before checking if + document resembles legal text and ignoring chunks that don't + pass the wind heuristic. By default, ``3``. + + Returns + ------- + bool + ``True`` if any ordinance text was found in the chunks. + """ + for ind, text in enumerate(self.text_chunks): + self._wind_mention_mem.append(possibly_mentions_wind(text)) + if ind >= min_chunks_to_process: + if not self.is_legal_text: + return False + + # fmt: off + if not any(self._wind_mention_mem[-self.num_to_recall:]): + continue + + logger.debug("Processing text at ind %d", ind) + logger.debug("Text:\n%s", text) + + if ind < min_chunks_to_process: + is_legal_text = await self.parse_from_ind( + ind, self.IS_LEGAL_TEXT_PROMPT, key="legal_text" + ) + self._legal_text_mem.append(is_legal_text) + if not is_legal_text: + logger.debug("Text at ind %d is not legal text", ind) + continue + + contains_ord_info = await self.parse_from_ind( + ind, self.CONTAINS_ORD_PROMPT, key="contains_ord_info" + ) + if not contains_ord_info: + logger.debug( + "Text at ind %d does not contain ordinance info", ind + ) + continue + + is_utility_scale = await self.parse_from_ind( + ind, self.IS_UTILITY_SCALE_PROMPT, key="x" + ) + if not is_utility_scale: + logger.debug( + "Text at ind %d is not for utility-scale WECS", ind + ) + continue + + self._ordinance_chunks.append({"text": text, "ind": ind}) + logger.debug("Added text at ind %d to ordinances", ind) + # mask, since we got a good result + self._wind_mention_mem[-1] = False + + return bool(self._ordinance_chunks)
+
+ + + +
+[docs] +class OrdinanceExtractor: + """Extract succinct ordinance text from input""" + + SYSTEM_MESSAGE = ( + "You extract one or more direct excerpts from a given text based on " + "the user's request. Maintain all original formatting and characters " + "without any paraphrasing. If the relevant text is inside of a " + "space-delimited table, return the entire table with the original " + "space-delimited formatting. Never paraphrase! Only return portions " + "of the original text directly." + ) + MODEL_INSTRUCTIONS_RESTRICTIONS = ( + "Extract one or more direct text excerpts related to the restrictions " + "of large wind energy systems with respect to any of the following:\n" + f"{RESTRICTIONS}" + "Include section headers (if any) for the text excerpts. Also include " + "any text excerpts that define what kind of large wind energy " + "conversion system the restriction applies to. If there is no text " + "related to siting restrictions of large wind systems, simply say: " + '"No relevant text."' + ) + MODEL_INSTRUCTIONS_SIZE = ( + "Extract one or more direct text excerpts pertaining to large wind " + "energy systems. Large wind energy systems (WES) may also be referred " + "to as wind turbines, wind energy conversion systems (WECS), wind " + "energy facilities (WEF), wind energy turbines (WET), large wind " + "energy turbines (LWET), utility-scale wind energy turbines (UWET), " + "or similar. Do not return any text excerpts that only apply to " + "private, micro, small, or medium sized wind energy systems. Include " + "section headers (if any) for the text excerpts. Also include any " + "text excerpts that define what kind of large wind energy conversion " + "system the restriction applies to. If there is no text pertaining to " + "large wind systems, simply say: " + '"No relevant text."' + ) + + def __init__(self, llm_caller): + """ + + Parameters + ---------- + llm_caller : elm.ords.llm.LLMCaller + LLM Caller instance used to extract ordinance info with. + """ + self.llm_caller = llm_caller + + async def _process(self, text_chunks, instructions, valid_chunk): + """Perform extraction processing.""" + logger.info( + "Extracting ordinance text from %d text chunks asynchronously...", + len(text_chunks), + ) + outer_task_name = asyncio.current_task().get_name() + summaries = [ + asyncio.create_task( + self.llm_caller.call( + sys_msg=self.SYSTEM_MESSAGE, + content=f"Text:\n{chunk}\n{instructions}", + usage_sub_label="document_ordinance_summary", + ), + name=outer_task_name, + ) + for chunk in text_chunks + ] + summary_chunks = await asyncio.gather(*summaries) + summary_chunks = [ + chunk for chunk in summary_chunks if valid_chunk(chunk) + ] + + text_summary = "\n".join(summary_chunks) + logger.debug( + "Final summary contains %d tokens", + ApiBase.count_tokens( + text_summary, + model=self.llm_caller.kwargs.get("model", "gpt-4"), + ), + ) + return text_summary + +
+[docs] + async def check_for_restrictions(self, text_chunks): + """Extract restriction ordinance text from input text chunks. + + Parameters + ---------- + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. + + Returns + ------- + str + Ordinance text extracted from text chunks. + """ + return await self._process( + text_chunks=text_chunks, + instructions=self.MODEL_INSTRUCTIONS_RESTRICTIONS, + valid_chunk=_valid_chunk_not_short, + )
+ + +
+[docs] + async def check_for_correct_size(self, text_chunks): + """Extract ordinance text from input text chunks for large WES. + + Parameters + ---------- + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. + + Returns + ------- + str + Ordinance text extracted from text chunks. + """ + return await self._process( + text_chunks=text_chunks, + instructions=self.MODEL_INSTRUCTIONS_SIZE, + valid_chunk=_valid_chunk, + )
+
+ + + +def _valid_chunk(chunk): + """True if chunk has content.""" + return chunk and "no relevant text" not in chunk.lower() + + +def _valid_chunk_not_short(chunk): + """True if chunk has content and is not too short.""" + return _valid_chunk(chunk) and len(chunk) > 20 +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/parse.html b/_modules/elm/ords/extraction/parse.html new file mode 100644 index 00000000..f9eed32a --- /dev/null +++ b/_modules/elm/ords/extraction/parse.html @@ -0,0 +1,645 @@ + + + + + + elm.ords.extraction.parse — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.parse

+# -*- coding: utf-8 -*-
+"""ELM Ordinance structured parsing class."""
+import asyncio
+import logging
+from copy import deepcopy
+from itertools import chain
+
+import pandas as pd
+
+from elm.ords.llm.calling import BaseLLMCaller, ChatLLMCaller
+from elm.ords.utilities import llm_response_as_json
+from elm.ords.extraction.tree import AsyncDecisionTree
+from elm.ords.extraction.features import SetbackFeatures
+from elm.ords.extraction.graphs import (
+    EXTRACT_ORIGINAL_TEXT_PROMPT,
+    setup_graph_wes_types,
+    setup_base_graph,
+    setup_multiplier,
+    setup_conditional,
+    setup_participating_owner,
+    setup_graph_extra_restriction,
+    llm_response_starts_with_yes,
+)
+
+
+logger = logging.getLogger(__name__)
+
+DEFAULT_SYSTEM_MESSAGE = (
+    "You are a legal scholar explaining legal ordinances to a wind "
+    "energy developer."
+)
+SETBACKS_SYSTEM_MESSAGE = (
+    f"{DEFAULT_SYSTEM_MESSAGE} "
+    "For the duration of this conversation, only focus on "
+    "ordinances relating to setbacks from {feature} for {wes_type}. Ignore "
+    "all text that pertains to private, micro, small, or medium sized wind "
+    "energy systems."
+)
+RESTRICTIONS_SYSTEM_MESSAGE = (
+    f"{DEFAULT_SYSTEM_MESSAGE} "
+    "For the duration of this conversation, only focus on "
+    "ordinances relating to {restriction} for {wes_type}. Ignore "
+    "all text that pertains to private, micro, small, or medium sized wind "
+    "energy systems."
+)
+EXTRA_RESTRICTIONS_TO_CHECK = {
+    "noise": "maximum noise level",
+    "max height": "maximum turbine height",
+    "min lot size": "minimum lot size",
+    "shadow flicker": "maximum shadow flicker",
+    "density": "maximum turbine spacing",
+}
+
+
+def _setup_async_decision_tree(graph_setup_func, **kwargs):
+    """Setup Async Decision tree dor ordinance extraction."""
+    G = graph_setup_func(**kwargs)
+    tree = AsyncDecisionTree(G)
+    assert len(tree.chat_llm_caller.messages) == 1
+    return tree
+
+
+def _found_ord(messages):
+    """Check if ordinance was found based on messages from the base graph.
+    IMPORTANT: This function may break if the base graph structure changes.
+    Always update the hardcoded values to match the base graph message
+    containing the LLM response about ordinance content.
+    """
+    if len(messages) < 3:
+        return False
+    return llm_response_starts_with_yes(messages[2].get("content", ""))
+
+
+async def _run_async_tree(tree, response_as_json=True):
+    """Run Async Decision Tree and return output as dict."""
+    try:
+        response = await tree.async_run()
+    except RuntimeError:
+        logger.error(
+            "    - NOTE: This is not necessarily an error and may just mean "
+            "that the text does not have the requested data."
+        )
+        response = None
+
+    if response_as_json:
+        return llm_response_as_json(response) if response else {}
+
+    return response
+
+
+async def _run_async_tree_with_bm(tree, base_messages):
+    """Run Async Decision Tree from base messages and return dict output."""
+    tree.chat_llm_caller.messages = base_messages
+    assert len(tree.chat_llm_caller.messages) == len(base_messages)
+    return await _run_async_tree(tree)
+
+
+def _empty_output(feature):
+    """Empty output for a feature (not found in text)."""
+    if feature in {"struct", "pline"}:
+        return [
+            {"feature": f"{feature} (participating)"},
+            {"feature": f"{feature} (non-participating)"},
+        ]
+    return [{"feature": feature}]
+
+
+
+[docs] +class StructuredOrdinanceParser(BaseLLMCaller): + """LLM ordinance document structured data scraping utility.""" + + def _init_chat_llm_caller(self, system_message): + """Initialize a ChatLLMCaller instance for the DecisionTree""" + return ChatLLMCaller( + self.llm_service, + system_message=system_message, + usage_tracker=self.usage_tracker, + **self.kwargs, + ) + +
+[docs] + async def parse(self, text): + """Parse text and extract structure ordinance data. + + Parameters + ---------- + text : str + Ordinance text which may or may not contain setbacks for one + or more features (property lines, structure, roads, etc.). + Text can also contain other supported regulations (noise, + shadow-flicker, etc,) which will be extracted as well. + + Returns + ------- + pd.DataFrame + DataFrame containing parsed-out ordinance values. + """ + largest_wes_type = await self._check_wind_turbine_type(text) + logger.info("Largest WES type found in text: %s", largest_wes_type) + + outer_task_name = asyncio.current_task().get_name() + feature_parsers = [ + asyncio.create_task( + self._parse_setback_feature( + text, feature_kwargs, largest_wes_type + ), + name=outer_task_name, + ) + for feature_kwargs in SetbackFeatures() + ] + extras_parsers = [ + asyncio.create_task( + self._parse_extra_restriction( + text, feature, r_text, largest_wes_type + ), + name=outer_task_name, + ) + for feature, r_text in EXTRA_RESTRICTIONS_TO_CHECK.items() + ] + outputs = await asyncio.gather(*(feature_parsers + extras_parsers)) + + return pd.DataFrame(chain.from_iterable(outputs))
+ + + async def _check_wind_turbine_type(self, text): + """Get the largest turbine size mentioned in the text.""" + logger.debug("Checking turbine_types") + tree = _setup_async_decision_tree( + setup_graph_wes_types, + text=text, + chat_llm_caller=self._init_chat_llm_caller(DEFAULT_SYSTEM_MESSAGE), + ) + dtree_wes_types_out = await _run_async_tree(tree) + + largest_wes_type = ( + dtree_wes_types_out.get("largest_wes_type") + or "large wind energy systems" + ) + return largest_wes_type + + async def _parse_extra_restriction( + self, text, feature, restriction_text, largest_wes_type + ): + """Parse a non-setback restriction from the text.""" + logger.debug("Parsing extra feature %r", feature) + system_message = RESTRICTIONS_SYSTEM_MESSAGE.format( + restriction=restriction_text, wes_type=largest_wes_type + ) + tree = _setup_async_decision_tree( + setup_graph_extra_restriction, + wes_type=largest_wes_type, + restriction=restriction_text, + text=text, + chat_llm_caller=self._init_chat_llm_caller(system_message), + ) + info = await _run_async_tree(tree) + info.update({"feature": feature}) + return [info] + + async def _parse_setback_feature( + self, text, feature_kwargs, largest_wes_type + ): + """Parse values for a setback feature.""" + feature = feature_kwargs["feature_id"] + feature_kwargs["wes_type"] = largest_wes_type + logger.debug("Parsing feature %r", feature) + + base_messages = await self._base_messages(text, **feature_kwargs) + if not _found_ord(base_messages): + logger.debug("Failed `_found_ord` check for feature %r", feature) + return _empty_output(feature) + + if feature not in {"struct", "pline"}: + output = {"feature": feature} + output.update( + await self._extract_setback_values( + text, + base_messages=base_messages, + **feature_kwargs, + ) + ) + return [output] + + return await self._extract_setback_values_for_p_or_np( + text, base_messages, **feature_kwargs + ) + + async def _base_messages(self, text, **feature_kwargs): + """Get base messages for setback feature parsing.""" + system_message = SETBACKS_SYSTEM_MESSAGE.format( + feature=feature_kwargs["feature"], + wes_type=feature_kwargs["wes_type"], + ) + tree = _setup_async_decision_tree( + setup_base_graph, + text=text, + chat_llm_caller=self._init_chat_llm_caller(system_message), + **feature_kwargs, + ) + await _run_async_tree(tree, response_as_json=False) + return deepcopy(tree.chat_llm_caller.messages) + + async def _extract_setback_values_for_p_or_np( + self, text, base_messages, **feature_kwargs + ): + """Extract setback values for participating/non-participating ords.""" + logger.debug("Checking participating vs non-participating") + dtree_participating_out = await self._run_setback_graph( + setup_participating_owner, + text, + base_messages=deepcopy(base_messages), + **feature_kwargs, + ) + outer_task_name = asyncio.current_task().get_name() + p_or_np_parsers = [ + asyncio.create_task( + self._parse_p_or_np_text( + key, sub_text, base_messages, **feature_kwargs + ), + name=outer_task_name, + ) + for key, sub_text in dtree_participating_out.items() + ] + return await asyncio.gather(*p_or_np_parsers) + + async def _parse_p_or_np_text( + self, key, sub_text, base_messages, **feature_kwargs + ): + """Parse participating/non-participating sub-text for ord values.""" + feature = feature_kwargs["feature_id"] + out_feat_name = f"{feature} ({key})" + output = {"feature": out_feat_name} + if not sub_text: + return output + + feature = feature_kwargs["feature"] + feature = f"{key} {feature}" + feature_kwargs["feature"] = feature + + base_messages = deepcopy(base_messages) + base_messages[-2]["content"] = EXTRACT_ORIGINAL_TEXT_PROMPT.format( + feature=feature, wes_type=feature_kwargs["wes_type"] + ) + base_messages[-1]["content"] = sub_text + + values = await self._extract_setback_values( + sub_text, + base_messages=base_messages, + **feature_kwargs, + ) + output.update(values) + return output + + async def _extract_setback_values(self, text, **kwargs): + """Extract setback values for a particular feature from input text.""" + dtree_out = await self._run_setback_graph( + setup_multiplier, text, **kwargs + ) + + if dtree_out.get("mult_value") is None: + return dtree_out + + dtree_con_out = await self._run_setback_graph( + setup_conditional, text, **kwargs + ) + dtree_out.update(dtree_con_out) + return dtree_out + + async def _run_setback_graph( + self, + graphs_setup_func, + text, + feature, + wes_type, + base_messages=None, + **kwargs, + ): + """Generic function to run async tree for ordinance extraction.""" + system_message = SETBACKS_SYSTEM_MESSAGE.format( + feature=feature, wes_type=wes_type + ) + tree = _setup_async_decision_tree( + graphs_setup_func, + feature=feature, + text=text, + chat_llm_caller=self._init_chat_llm_caller(system_message), + **kwargs, + ) + if base_messages: + return await _run_async_tree_with_bm(tree, base_messages) + return await _run_async_tree(tree)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/extraction/tree.html b/_modules/elm/ords/extraction/tree.html new file mode 100644 index 00000000..1858efd4 --- /dev/null +++ b/_modules/elm/ords/extraction/tree.html @@ -0,0 +1,454 @@ + + + + + + elm.ords.extraction.tree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.extraction.tree

+# -*- coding: utf-8 -*-
+"""ELM Ordinance async decision tree."""
+import networkx as nx
+import logging
+
+from elm.tree import DecisionTree
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class AsyncDecisionTree(DecisionTree): + """Async class to traverse a directed graph of LLM prompts. Nodes are + prompts and edges are transitions between prompts based on conditions + being met in the LLM response.""" + + def __init__(self, graph): + """Async class to traverse a directed graph of LLM prompts. Nodes are + prompts and edges are transitions between prompts based on conditions + being met in the LLM response. + + Parameters + ---------- + graph : nx.DiGraph + Directed acyclic graph where nodes are LLM prompts and edges are + logical transitions based on the response. Must have high-level + graph attribute "chat_llm_caller" which is a ChatLLMCaller + instance. Nodes should have attribute "prompt" which can have + {format} named arguments that will be filled from the high-level + graph attributes. Edges can have attribute "condition" that is a + callable to be executed on the LLM response text. An edge from a + node without a condition acts as an "else" statement if no other + edge conditions are satisfied. A single edge from node to node + does not need a condition. + """ + self._g = graph + self._history = [] + assert isinstance(self.graph, nx.DiGraph) + assert "chat_llm_caller" in self.graph.graph + + @property + def chat_llm_caller(self): + """elm.ords.llm.ChatLLMCaller: ChatLLMCaller instance for this tree.""" + return self.graph.graph["chat_llm_caller"] + + @property + def messages(self): + """Get a list of the conversation messages with the LLM. + + Returns + ------- + list + """ + return self.chat_llm_caller.messages + + @property + def all_messages_txt(self): + """Get a printout of the full conversation with the LLM + + Returns + ------- + str + """ + messages = [ + f"{msg['role'].upper()}: {msg['content']}" for msg in self.messages + ] + messages = "\n\n".join(messages) + return messages + +
+[docs] + async def async_call_node(self, node0): + """Call the LLM with the prompt from the input node and search the + successor edges for a valid transition condition + + Parameters + ---------- + node0 : str + Name of node being executed. + + Returns + ------- + out : str + Next node or LLM response if at a leaf node. + """ + prompt = self._prepare_graph_call(node0) + out = await self.chat_llm_caller.call(prompt, usage_sub_label="dtree") + logger.debug( + "Chat GPT prompt:\n%s\nChat GPT response:\n%s", prompt, out + ) + return self._parse_graph_output(node0, out)
+ + +
+[docs] + async def async_run(self, node0="init"): + """Traverse the decision tree starting at the input node. + + Parameters + ---------- + node0 : str + Name of starting node in the graph. This is typically called "init" + + Returns + ------- + out : str + Final response from LLM at the leaf node. + """ + + self._history = [] + + while True: + try: + out = await self.async_call_node(node0) + except Exception as e: + logger.debug( + "Error traversing trees, here's the full " + "conversation printout:\n%s", + self.all_messages_txt, + ) + last_message = self.messages[-1]["content"] + msg = ( + "Ran into an exception when traversing tree. " + "Last message from LLM is printed below. " + "See debug logs for more detail. " + "\nLast message: \n" + '"""\n%s\n"""' + ) + logger.error(msg, last_message) + logger.exception(e) + raise RuntimeError(msg % last_message) from e + if out in self.graph: + node0 = out + else: + break + + logger.info("Output: %s", out) + + return out
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/llm/calling.html b/_modules/elm/ords/llm/calling.html new file mode 100644 index 00000000..d852e563 --- /dev/null +++ b/_modules/elm/ords/llm/calling.html @@ -0,0 +1,514 @@ + + + + + + elm.ords.llm.calling — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.llm.calling

+# -*- coding: utf-8 -*-
+"""ELM Ordinances LLM Calling classes."""
+import logging
+
+from elm.ords.utilities import llm_response_as_json
+
+
+logger = logging.getLogger(__name__)
+_JSON_INSTRUCTIONS = "Return your answer in JSON format"
+
+
+
+[docs] +class BaseLLMCaller: + """Class to support LLM calling functionality.""" + + def __init__(self, llm_service, usage_tracker=None, **kwargs): + """ + + Parameters + ---------- + llm_service : elm.ords.services.base.Service + LLM service used for queries. + usage_tracker : elm.ords.services.usage.UsageTracker, optional + Optional tracker instance to monitor token usage during + LLM calls. By default, ``None``. + **kwargs + Keyword arguments to be passed to the underlying service + processing function (i.e. `llm_service.call(**kwargs)`). + Should *not* contain the following keys: + + - usage_tracker + - usage_sub_label + - messages + + These arguments are provided by this caller object. + """ + self.llm_service = llm_service + self.usage_tracker = usage_tracker + self.kwargs = kwargs
+ + + +
+[docs] +class LLMCaller(BaseLLMCaller): + """Simple LLM caller, with no memory and no parsing utilities.""" + +
+[docs] + async def call(self, sys_msg, content, usage_sub_label="default"): + """Call LLM. + + Parameters + ---------- + sys_msg : str + The LLM system message. + content : str + Your chat message for the LLM. + usage_sub_label : str, optional + Label to store token usage under. By default, ``"default"``. + + Returns + ------- + str | None + The LLM response, as a string, or ``None`` if something went + wrong during the call. + """ + response = await self.llm_service.call( + usage_tracker=self.usage_tracker, + usage_sub_label=usage_sub_label, + messages=[ + {"role": "system", "content": sys_msg}, + {"role": "user", "content": content}, + ], + **self.kwargs, + ) + return response
+
+ + + +
+[docs] +class ChatLLMCaller(BaseLLMCaller): + """Class to support chat-like LLM calling functionality.""" + + def __init__( + self, llm_service, system_message, usage_tracker=None, **kwargs + ): + """ + + Parameters + ---------- + llm_service : elm.ords.services.base.Service + LLM service used for queries. + system_message : str + System message to use for chat with LLM. + usage_tracker : elm.ords.services.usage.UsageTracker, optional + Optional tracker instance to monitor token usage during + LLM calls. By default, ``None``. + **kwargs + Keyword arguments to be passed to the underlying service + processing function (i.e. `llm_service.call(**kwargs)`). + Should *not* contain the following keys: + + - usage_tracker + - usage_sub_label + - messages + + These arguments are provided by this caller object. + """ + super().__init__(llm_service, usage_tracker, **kwargs) + self.messages = [{"role": "system", "content": system_message}] + +
+[docs] + async def call(self, content, usage_sub_label="chat"): + """Chat with the LLM. + + Parameters + ---------- + content : str + Your chat message for the LLM. + usage_sub_label : str, optional + Label to store token usage under. By default, ``"chat"``. + + Returns + ------- + str | None + The LLM response, as a string, or ``None`` if something went + wrong during the call. + """ + self.messages.append({"role": "user", "content": content}) + + response = await self.llm_service.call( + usage_tracker=self.usage_tracker, + usage_sub_label=usage_sub_label, + messages=self.messages, + **self.kwargs, + ) + if response is None: + self.messages = self.messages[:-1] + return None + + self.messages.append({"role": "assistant", "content": response}) + return response
+
+ + + +
+[docs] +class StructuredLLMCaller(BaseLLMCaller): + """Class to support structured (JSON) LLM calling functionality.""" + +
+[docs] + async def call(self, sys_msg, content, usage_sub_label="default"): + """Call LLM for structured data retrieval. + + Parameters + ---------- + sys_msg : str + The LLM system message. If this text does not contain the + instruction text "Return your answer in JSON format", it + will be added. + content : str + LLM call content (typically some text to extract info from). + usage_sub_label : str, optional + Label to store token usage under. By default, ``"default"``. + + Returns + ------- + dict + Dictionary containing the LLM-extracted features. Dictionary + may be empty if there was an error during the LLM call. + """ + sys_msg = _add_json_instructions_if_needed(sys_msg) + + response = await self.llm_service.call( + usage_tracker=self.usage_tracker, + usage_sub_label=usage_sub_label, + messages=[ + {"role": "system", "content": sys_msg}, + {"role": "user", "content": content}, + ], + **self.kwargs, + ) + return llm_response_as_json(response) if response else {}
+
+ + + +def _add_json_instructions_if_needed(system_message): + """Add JSON instruction to system message if needed.""" + if _JSON_INSTRUCTIONS.casefold() not in system_message.casefold(): + logger.debug( + "JSON instructions not found in system message. Adding..." + ) + system_message = f"{system_message} {_JSON_INSTRUCTIONS}." + logger.debug("New system message:\n%s", system_message) + return system_message +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/process.html b/_modules/elm/ords/process.html new file mode 100644 index 00000000..b779219a --- /dev/null +++ b/_modules/elm/ords/process.html @@ -0,0 +1,1035 @@ + + + + + + elm.ords.process — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.process

+# -*- coding: utf-8 -*-
+"""ELM Ordinance full processing logic"""
+import os
+import time
+import json
+import asyncio
+import logging
+from datetime import datetime, timedelta
+from pathlib import Path
+from functools import partial
+
+import openai
+import pandas as pd
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+
+from elm import ApiBase
+from elm.ords.download import download_county_ordinance
+from elm.ords.extraction import (
+    extract_ordinance_text_with_ngram_validation,
+    extract_ordinance_values,
+)
+from elm.ords.services.usage import UsageTracker
+from elm.ords.services.openai import OpenAIService, usage_from_response
+from elm.ords.services.provider import RunningAsyncServices
+from elm.ords.services.threaded import (
+    TempFileCache,
+    FileMover,
+    CleanedFileWriter,
+    OrdDBFileWriter,
+    UsageUpdater,
+)
+from elm.ords.services.cpu import PDFLoader, read_pdf_doc, read_pdf_doc_ocr
+from elm.ords.utilities import (
+    RTS_SEPARATORS,
+    load_all_county_info,
+    load_counties_from_fp,
+)
+from elm.ords.utilities.location import County
+from elm.ords.utilities.queued_logging import (
+    LocationFileLog,
+    LogListener,
+    NoLocationFilter,
+)
+
+logger = logging.getLogger(__name__)
+
+
+OUT_COLS = [
+    "county",
+    "state",
+    "FIPS",
+    "feature",
+    "fixed_value",
+    "mult_value",
+    "mult_type",
+    "adder",
+    "min_dist",
+    "max_dist",
+    "value",
+    "units",
+    "ord_year",
+    "last_updated",
+    "section",
+    "source",
+    "comment",
+]
+
+CHECK_COLS = [
+    "fixed_value",
+    "mult_value",
+    "adder",
+    "min_dist",
+    "max_dist",
+    "value",
+]
+
+
+
+[docs] +async def process_counties_with_openai( + out_dir, + county_fp=None, + model="gpt-4", + azure_api_key=None, + azure_version=None, + azure_endpoint=None, + llm_call_kwargs=None, + llm_service_rate_limit=4000, + text_splitter_chunk_size=3000, + text_splitter_chunk_overlap=300, + num_urls_to_check_per_county=5, + max_num_concurrent_browsers=10, + file_loader_kwargs=None, + pytesseract_exe_fp=None, + td_kwargs=None, + tpe_kwargs=None, + ppe_kwargs=None, + log_dir=None, + clean_dir=None, + county_ords_dir=None, + county_dbs_dir=None, + log_level="INFO", +): + """Download and extract ordinances for a list of counties. + + Parameters + ---------- + out_dir : path-like + Path to output directory. This directory will be created if it + does not exist. This directory will contain the structured + ordinance output CSV as well as all of the scraped ordinance + documents (PDFs and HTML text files). Usage information and + default options for log/clean directories will also be stored + here. + county_fp : path-like, optional + Path to CSV file containing a list of counties to extract + ordinance information for. This CSV should have "County" and + "State" columns that contains the county and state names. + By default, ``None``, which runs the extraction for all known + counties (this is untested and not currently recommended). + model : str, optional + Name of LLM model to perform scraping. By default, ``"gpt-4"``. + azure_api_key : str, optional + Azure OpenAI API key. By default, ``None``, which pulls the key + from the environment variable ``AZURE_OPENAI_API_KEY`` instead. + azure_version : str, optional + Azure OpenAI API version. By default, ``None``, which pulls the + version from the environment variable ``AZURE_OPENAI_VERSION`` + instead. + azure_endpoint : str, optional + Azure OpenAI API endpoint. By default, ``None``, which pulls the + endpoint from the environment variable ``AZURE_OPENAI_ENDPOINT`` + instead. + llm_call_kwargs : dict, optional + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. By default, ``None``. + llm_service_rate_limit : int, optional + Token rate limit of LLm service being used (OpenAI). + By default, ``4000``. + text_splitter_chunk_size : int, optional + Chunk size input to + `langchain.text_splitter.RecursiveCharacterTextSplitter`. + By default, ``3000``. + text_splitter_chunk_overlap : int, optional + Chunk overlap input to + `langchain.text_splitter.RecursiveCharacterTextSplitter`. + By default, ``300``. + num_urls_to_check_per_county : int, optional + Number of unique Google search result URL's to check for + ordinance document. By default, ``5``. + max_num_concurrent_browsers : int, optional + Number of unique concurrent browser instances to open when + performing Google search. Setting this number too high on a + machine with limited processing can lead to increased timeouts + and therefore decreased quality of Google search results. + By default, ``10``. + pytesseract_exe_fp : path-like, optional + Path to pytesseract executable. If this option is specified, OCR + parsing for PDf files will be enabled via pytesseract. + By default, ``None``. + td_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`tempfile.TemporaryDirectory`. The temporary directory is + used to store files downloaded from the web that are still being + parsed for ordinance information. By default, ``None``. + tpe_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ThreadPoolExecutor`. The thread pool + executor is used to run I/O intensive tasks like writing to a + log file. By default, ``None``. + ppe_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ProcessPoolExecutor`. The process + pool executor is used to run CPU intensive tasks like loading + a PDF file. By default, ``None``. + log_dir : path-like, optional + Path to directory for log files. This directory will be created + if it does not exist. By default, ``None``, which + creates a ``logs`` folder in the output directory for the + county-specific log files. + clean_dir : path-like, optional + Path to directory for cleaned ordinance text output. This + directory will be created if it does not exist. By default, + ``None``, which creates a ``clean`` folder in the output + directory for the cleaned ordinance text files. + county_ords_dir : path-like, optional + Path to directory for individual county ordinance file outputs. + This directory will be created if it does not exist. + By default, ``None``, which creates a ``county_ord_files`` + folder in the output directory. + county_dbs_dir : path-like, optional + Path to directory for individual county ordinance database + outputs. This directory will be created if it does not exist. + By default, ``None``, which creates a ``county_dbs`` folder in + the output directory. + log_level : str, optional + Log level to set for county retrieval and parsing loggers. + By default, ``"INFO"``. + + Returns + ------- + pd.DataFrame + DataFrame of parsed ordinance information. This file will also + be stored in the output directory under "wind_db.csv". + """ + start_time = time.time() + log_listener = LogListener(["elm"], level=log_level) + dirs = _setup_folders( + out_dir, + log_dir=log_dir, + clean_dir=clean_dir, + cod=county_ords_dir, + cdd=county_dbs_dir, + ) + out_dir, log_dir, clean_dir, county_ords_dir, county_dbs_dir = dirs + async with log_listener as ll: + _setup_main_logging(log_dir, log_level, ll) + db = await _process_with_logs( + out_dir, + log_dir, + clean_dir, + county_ords_dir, + county_dbs_dir, + ll, + county_fp=county_fp, + model=model, + azure_api_key=azure_api_key, + azure_version=azure_version, + azure_endpoint=azure_endpoint, + llm_call_kwargs=llm_call_kwargs, + llm_service_rate_limit=llm_service_rate_limit, + text_splitter_chunk_size=text_splitter_chunk_size, + text_splitter_chunk_overlap=text_splitter_chunk_overlap, + num_urls_to_check_per_county=num_urls_to_check_per_county, + max_num_concurrent_browsers=max_num_concurrent_browsers, + file_loader_kwargs=file_loader_kwargs, + pytesseract_exe_fp=pytesseract_exe_fp, + td_kwargs=td_kwargs, + tpe_kwargs=tpe_kwargs, + ppe_kwargs=ppe_kwargs, + log_level=log_level, + ) + _record_total_time(out_dir / "usage.json", time.time() - start_time) + return db
+ + + +async def _process_with_logs( + out_dir, + log_dir, + clean_dir, + county_ords_dir, + county_dbs_dir, + log_listener, + county_fp=None, + model="gpt-4", + azure_api_key=None, + azure_version=None, + azure_endpoint=None, + llm_call_kwargs=None, + llm_service_rate_limit=4000, + text_splitter_chunk_size=3000, + text_splitter_chunk_overlap=300, + num_urls_to_check_per_county=5, + max_num_concurrent_browsers=10, + file_loader_kwargs=None, + pytesseract_exe_fp=None, + td_kwargs=None, + tpe_kwargs=None, + ppe_kwargs=None, + log_level="INFO", +): + """Process counties with logging enabled.""" + counties = _load_counties_to_process(county_fp) + azure_api_key, azure_version, azure_endpoint = _validate_api_params( + azure_api_key, azure_version, azure_endpoint + ) + + tpe_kwargs = _configure_thread_pool_kwargs(tpe_kwargs) + file_loader_kwargs = _configure_file_loader_kwargs(file_loader_kwargs) + if pytesseract_exe_fp is not None: + _setup_pytesseract(pytesseract_exe_fp) + file_loader_kwargs.update({"pdf_ocr_read_coroutine": read_pdf_doc_ocr}) + + text_splitter = RecursiveCharacterTextSplitter( + RTS_SEPARATORS, + chunk_size=text_splitter_chunk_size, + chunk_overlap=text_splitter_chunk_overlap, + length_function=partial(ApiBase.count_tokens, model=model), + ) + client = openai.AsyncAzureOpenAI( + api_key=azure_api_key, + api_version=azure_version, + azure_endpoint=azure_endpoint, + ) + + services = [ + OpenAIService(client, rate_limit=llm_service_rate_limit), + TempFileCache(td_kwargs=td_kwargs, tpe_kwargs=tpe_kwargs), + FileMover(county_ords_dir, tpe_kwargs=tpe_kwargs), + CleanedFileWriter(clean_dir, tpe_kwargs=tpe_kwargs), + OrdDBFileWriter(county_dbs_dir, tpe_kwargs=tpe_kwargs), + UsageUpdater(out_dir / "usage.json", tpe_kwargs=tpe_kwargs), + PDFLoader(**(ppe_kwargs or {})), + ] + + browser_semaphore = ( + asyncio.Semaphore(max_num_concurrent_browsers) + if max_num_concurrent_browsers + else None + ) + + async with RunningAsyncServices(services): + tasks = [] + trackers = [] + for __, row in counties.iterrows(): + county, state, fips = row[["County", "State", "FIPS"]] + location = County(county.strip(), state=state.strip(), fips=fips) + usage_tracker = UsageTracker( + location.full_name, usage_from_response + ) + trackers.append(usage_tracker) + task = asyncio.create_task( + download_docs_for_county_with_logging( + log_listener, + log_dir, + location, + text_splitter, + num_urls=num_urls_to_check_per_county, + file_loader_kwargs=file_loader_kwargs, + browser_semaphore=browser_semaphore, + level=log_level, + llm_service=OpenAIService, + usage_tracker=usage_tracker, + model=model, + **(llm_call_kwargs or {}), + ), + name=location.full_name, + ) + tasks.append(task) + docs = await asyncio.gather(*tasks) + + db = _docs_to_db(docs) + db.to_csv(out_dir / "wind_db.csv", index=False) + return db + + +def _setup_main_logging(log_dir, level, listener): + """Setup main logger for catching exceptions during execution.""" + handler = logging.FileHandler(log_dir / "main.log", encoding="utf-8") + handler.setLevel(level) + handler.addFilter(NoLocationFilter()) + listener.addHandler(handler) + + +def _setup_folders( + out_dir, + log_dir=None, + clean_dir=None, + cod=None, + cdd=None, +): + """Setup output directory folders.""" + out_dir = Path(out_dir) + out_folders = [ + out_dir, + Path(log_dir) if log_dir else out_dir / "logs", + Path(clean_dir) if clean_dir else out_dir / "clean", + Path(cod) if cod else out_dir / "county_ord_files", + Path(cdd) if cdd else out_dir / "county_dbs", + ] + for folder in out_folders: + folder.mkdir(exist_ok=True, parents=True) + return out_folders + + +def _load_counties_to_process(county_fp): + """Load the counties to retrieve documents for.""" + if county_fp is None: + logger.info("No `county_fp` input! Loading all counties") + return load_all_county_info() + return load_counties_from_fp(county_fp) + + +def _validate_api_params(azure_api_key, azure_version, azure_endpoint): + """Validate OpenAI API parameters.""" + azure_api_key = azure_api_key or os.environ.get("AZURE_OPENAI_API_KEY") + azure_version = azure_version or os.environ.get("AZURE_OPENAI_VERSION") + azure_endpoint = azure_endpoint or os.environ.get("AZURE_OPENAI_ENDPOINT") + assert azure_api_key is not None, "Must set AZURE_OPENAI_API_KEY!" + assert azure_version is not None, "Must set AZURE_OPENAI_VERSION!" + assert azure_endpoint is not None, "Must set AZURE_OPENAI_ENDPOINT!" + return azure_api_key, azure_version, azure_endpoint + + +def _configure_thread_pool_kwargs(tpe_kwargs): + """Set thread pool workers to 5 if user didn't specify.""" + tpe_kwargs = tpe_kwargs or {} + tpe_kwargs.setdefault("max_workers", 5) + return tpe_kwargs + + +def _configure_file_loader_kwargs(file_loader_kwargs): + """Add PDF reading coroutine to kwargs.""" + file_loader_kwargs = file_loader_kwargs or {} + file_loader_kwargs.update({"pdf_read_coroutine": read_pdf_doc}) + return file_loader_kwargs + + +
+[docs] +async def download_docs_for_county_with_logging( + listener, + log_dir, + county, + text_splitter, + num_urls=5, + file_loader_kwargs=None, + browser_semaphore=None, + level="INFO", + **kwargs, +): + """Retrieve ordinance document for a single county with async logs. + + Parameters + ---------- + listener : elm.ords.utilities.queued_logging.LogListener + Active ``LogListener`` instance that can be passed to + :class:`elm.ords.utilities.queued_logging.LocationFileLog`. + log_dir : path-like + Path to output directory to contain log file. + county : elm.ords.utilities.location.Location + County to retrieve ordinance document for. + text_splitter : obj, optional + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + num_urls : int, optional + Number of unique Google search result URL's to check for + ordinance document. By default, ``5``. + file_loader_kwargs : dict, optional + Dictionary of keyword-argument pairs to initialize + :class:`elm.web.file_loader.AsyncFileLoader` with. The + "pw_launch_kwargs" key in these will also be used to initialize + the :class:`elm.web.google_search.PlaywrightGoogleLinkSearch` + used for the google URL search. By default, ``None``. + browser_semaphore : asyncio.Semaphore, optional + Semaphore instance that can be used to limit the number of + playwright browsers open concurrently. If ``None``, no limits + are applied. By default, ``None``. + level : str, optional + Log level to set for retrieval logger. By default, ``"INFO"``. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument | None + Document instance for the ordinance document, or ``None`` if no + document was found. Extracted ordinance information is stored in + the document's ``metadata`` attribute. + """ + with LocationFileLog( + listener, log_dir, location=county.full_name, level=level + ): + task = asyncio.create_task( + download_doc_for_county( + county, + text_splitter, + num_urls=num_urls, + file_loader_kwargs=file_loader_kwargs, + browser_semaphore=browser_semaphore, + **kwargs, + ), + name=county.full_name, + ) + try: + doc, *__ = await asyncio.gather(task) + except KeyboardInterrupt: + raise + except Exception as e: + logger.error( + "Encountered error while processing %s:", county.full_name + ) + logger.exception(e) + doc = None + + return doc
+ + + +
+[docs] +async def download_doc_for_county( + county, + text_splitter, + num_urls=5, + file_loader_kwargs=None, + browser_semaphore=None, + **kwargs, +): + """Download and parse ordinance document for a single county. + + Parameters + ---------- + county : elm.ords.utilities.location.Location + County to retrieve ordinance document for. + text_splitter : obj, optional + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. Langchain's text splitters should work for this + input. + num_urls : int, optional + Number of unique Google search result URL's to check for + ordinance document. By default, ``5``. + file_loader_kwargs : dict, optional + Dictionary of keyword-argument pairs to initialize + :class:`elm.web.file_loader.AsyncFileLoader` with. The + "pw_launch_kwargs" key in these will also be used to initialize + the :class:`elm.web.google_search.PlaywrightGoogleLinkSearch` + used for the google URL search. By default, ``None``. + browser_semaphore : asyncio.Semaphore, optional + Semaphore instance that can be used to limit the number of + playwright browsers open concurrently. If ``None``, no limits + are applied. By default, ``None``. + **kwargs + Keyword-value pairs used to initialize an + `elm.ords.llm.LLMCaller` instance. + + Returns + ------- + elm.web.document.BaseDocument | None + Document instance for the ordinance document, or ``None`` if no + document was found. Extracted ordinance information is stored in + the document's ``metadata`` attribute. + """ + start_time = time.time() + doc = await download_county_ordinance( + county, + text_splitter, + num_urls=num_urls, + file_loader_kwargs=file_loader_kwargs, + browser_semaphore=browser_semaphore, + **kwargs, + ) + if doc is None: + await _record_time_and_usage(start_time, **kwargs) + return None + + doc.metadata["location"] = county + doc.metadata["location_name"] = county.full_name + await _record_usage(**kwargs) + + doc = await extract_ordinance_text_with_ngram_validation( + doc, text_splitter, **kwargs + ) + await _record_usage(**kwargs) + + doc = await _write_cleaned_text(doc) + doc = await extract_ordinance_values(doc, **kwargs) + + ord_count = _num_ords_in_doc(doc) + if ord_count > 0: + doc = await _move_file_to_out_dir(doc) + doc = await _write_ord_db(doc) + logger.info( + "%d ordinance value(s) found for %s. Outputs are here: '%s'", + ord_count, + county.full_name, + doc.metadata["ord_db_fp"], + ) + else: + logger.info("No ordinances found for %s.", county.full_name) + + await _record_time_and_usage(start_time, **kwargs) + return doc
+ + + +async def _record_usage(**kwargs): + """Dump usage to file if tracker found in kwargs.""" + usage_tracker = kwargs.get("usage_tracker") + if usage_tracker: + await UsageUpdater.call(usage_tracker) + + +async def _record_time_and_usage(start_time, **kwargs): + """Add elapsed time before updating usage to file.""" + seconds_elapsed = time.time() - start_time + usage_tracker = kwargs.get("usage_tracker") + if usage_tracker: + usage_tracker["total_time_seconds"] = seconds_elapsed + usage_tracker["total_time"] = str(timedelta(seconds=seconds_elapsed)) + await UsageUpdater.call(usage_tracker) + + +async def _move_file_to_out_dir(doc): + """Move PDF or HTML text file to output directory.""" + out_fp = await FileMover.call(doc) + doc.metadata["out_fp"] = out_fp + return doc + + +async def _write_cleaned_text(doc): + """Write cleaned text to `clean_dir`.""" + out_fp = await CleanedFileWriter.call(doc) + doc.metadata["cleaned_fp"] = out_fp + return doc + + +async def _write_ord_db(doc): + """Write cleaned text to `county_dbs_dir`.""" + out_fp = await OrdDBFileWriter.call(doc) + doc.metadata["ord_db_fp"] = out_fp + return doc + + +def _setup_pytesseract(exe_fp): + """Set the pytesseract command.""" + import pytesseract + + logger.debug("Setting `tesseract_cmd` to %s", exe_fp) + pytesseract.pytesseract.tesseract_cmd = exe_fp + + +def _record_total_time(fp, seconds_elapsed): + """Dump usage to an existing file.""" + if not Path(fp).exists(): + usage_info = {} + else: + with open(fp, "r") as fh: + usage_info = json.load(fh) + + total_time_str = str(timedelta(seconds=seconds_elapsed)) + usage_info["total_time_seconds"] = seconds_elapsed + usage_info["total_time"] = total_time_str + + with open(fp, "w") as fh: + json.dump(usage_info, fh, indent=4) + + logger.info("Total processing time: %s", total_time_str) + + +def _num_ords_in_doc(doc): + """Check if doc contains any scraped ordinance values.""" + if doc is None: + return 0 + + if "ordinance_values" not in doc.metadata: + return 0 + + ord_vals = doc.metadata["ordinance_values"] + if ord_vals.empty: + return 0 + + check_cols = [col for col in CHECK_COLS if col in ord_vals] + if not check_cols: + return 0 + + return (~ord_vals[check_cols].isna()).values.sum(axis=1).sum() + + +def _docs_to_db(docs): + """Convert list of docs to output database.""" + db = [] + for doc in docs: + if doc is None or isinstance(doc, Exception): + continue + + if _num_ords_in_doc(doc) == 0: + continue + + results = _db_results(doc) + results = _formatted_db(results) + db.append(results) + + if not db: + return pd.DataFrame(columns=OUT_COLS) + + db = pd.concat(db) + db = _empirical_adjustments(db) + return _formatted_db(db) + + +def _db_results(doc): + """Extract results from doc metadata to DataFrame.""" + results = doc.metadata.get("ordinance_values") + if results is None: + return None + + results["source"] = doc.metadata.get("source") + year = doc.metadata.get("date", (None, None, None))[0] + results["ord_year"] = year if year is not None and year > 0 else None + results["last_updated"] = datetime.now().strftime("%m/%d/%Y") + + location = doc.metadata["location"] + results["FIPS"] = location.fips + results["county"] = location.name + results["state"] = location.state + return results + + +def _empirical_adjustments(db): + """Post-processing adjustments based on empirical observations. + + Current adjustments include: + + - Limit adder to max of 250 ft. + - Chat GPT likes to report large values here, but in + practice all values manually observed in ordinance documents + are below 250 ft. + + """ + if "adder" in db.columns: + db.loc[db["adder"] > 250, "adder"] = None + return db + + +def _formatted_db(db): + """Format DataFrame for output.""" + out_cols = [col for col in OUT_COLS if col in db.columns] + return db[out_cols] +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/base.html b/_modules/elm/ords/services/base.html new file mode 100644 index 00000000..6b529f42 --- /dev/null +++ b/_modules/elm/ords/services/base.html @@ -0,0 +1,458 @@ + + + + + + elm.ords.services.base — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.base

+# -*- coding: utf-8 -*-
+"""ELM abstract Service class."""
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+
+from elm.ords.services.queues import get_service_queue
+from elm.ords.utilities.exceptions import ELMOrdsNotInitializedError
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class Service(ABC): + """Abstract base class for a Service that can be queued to run.""" + + MAX_CONCURRENT_JOBS = 10_000 + """Max number of concurrent job submissions.""" + + @classmethod + def _queue(cls): + """Get queue for class.""" + queue = get_service_queue(cls.__name__) + if queue is None: + raise ELMOrdsNotInitializedError("Must initialize the queue!") + return queue + +
+[docs] + @classmethod + async def call(cls, *args, **kwargs): + """Call the service. + + Parameters + ---------- + *args, **kwargs + Positional and keyword arguments to be passed to the + underlying service processing function. + + Returns + ------- + obj + A response object from the underlying service. + """ + fut = asyncio.Future() + outer_task_name = asyncio.current_task().get_name() + await cls._queue().put((fut, outer_task_name, args, kwargs)) + return await fut
+ + + @property + def name(self): + """str: Service name used to pull the correct queue object.""" + return self.__class__.__name__ + +
+[docs] + async def process_using_futures(self, fut, *args, **kwargs): + """Process a call to the service. + + Parameters + ---------- + fut : asyncio.Future + A future object that should get the result of the processing + operation. If the processing function returns ``answer``, + this method should call ``fut.set_result(answer)``. + **kwargs + Keyword arguments to be passed to the + underlying processing function. + """ + + try: + response = await self.process(*args, **kwargs) + except Exception as e: + fut.set_exception(e) + return + + fut.set_result(response)
+ + +
+[docs] + def acquire_resources(self): + """Use this method to allocate resources, if needed"""
+ + +
+[docs] + def release_resources(self): + """Use this method to clean up resources, if needed"""
+ + + @property + @abstractmethod + def can_process(self): + """Check if process function can be called. + + This should be a fast-running method that returns a boolean + indicating wether or not the service can accept more + processing calls. + """ + +
+[docs] + @abstractmethod + async def process(self, *args, **kwargs): + """Process a call to the service. + + Parameters + ---------- + *args, **kwargs + Positional and keyword arguments to be passed to the + underlying processing function. + """
+
+ + + +
+[docs] +class RateLimitedService(Service): + """Abstract Base Class representing a rate-limited service (e.g. OpenAI)""" + + def __init__(self, rate_limit, rate_tracker): + """ + + Parameters + ---------- + rate_limit : int | float + Max usage per duration of the rate tracker. For example, + if the rate tracker is set to compute the total over + minute-long intervals, this value should be the max usage + per minute. + rate_tracker : `elm.ords.utilities.usage.TimeBoundedUsageTracker` + A TimeBoundedUsageTracker instance. This will be used to + track usage per time interval and compare to `rate_limit`. + """ + self.rate_limit = rate_limit + self.rate_tracker = rate_tracker + + @property + def can_process(self): + """Check if usage is under the rate limit.""" + return self.rate_tracker.total < self.rate_limit
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/cpu.html b/_modules/elm/ords/services/cpu.html new file mode 100644 index 00000000..dec5a6ee --- /dev/null +++ b/_modules/elm/ords/services/cpu.html @@ -0,0 +1,474 @@ + + + + + + elm.ords.services.cpu — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.cpu

+# -*- coding: utf-8 -*-
+"""ELM Ordinance CPU-bound services"""
+import asyncio
+from functools import partial
+from concurrent.futures import ProcessPoolExecutor
+
+from elm.ords.services.base import Service
+from elm.web.document import PDFDocument
+from elm.utilities.parse import read_pdf, read_pdf_ocr
+
+
+
+[docs] +class ProcessPoolService(Service): + """Service that contains a ProcessPoolExecutor instance""" + + def __init__(self, **kwargs): + """ + + Parameters + ---------- + **kwargs + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ProcessPoolExecutor`. + By default, ``None``. + """ + self._ppe_kwargs = kwargs or {} + self.pool = None + +
+[docs] + def acquire_resources(self): + """Open thread pool and temp directory""" + self.pool = ProcessPoolExecutor(**self._ppe_kwargs)
+ + +
+[docs] + def release_resources(self): + """Shutdown thread pool and cleanup temp directory""" + self.pool.shutdown(wait=True, cancel_futures=True)
+
+ + + +
+[docs] +class PDFLoader(ProcessPoolService): + """Class to load PDFs in a ProcessPoolExecutor.""" + + @property + def can_process(self): + """bool: Always ``True`` (limiting is handled by asyncio)""" + return True + +
+[docs] + async def process(self, fn, pdf_bytes, **kwargs): + """Write URL doc to file asynchronously. + + Parameters + ---------- + doc : elm.web.document.Document + Document containing meta information about the file. Must + have a "source" key in the `metadata` dict containing the + URL, which will be converted to a file name using + :func:`compute_fn_from_url`. + file_content : str | bytes + File content, typically string text for HTML files and bytes + for PDF file. + make_name_unique : bool, optional + Option to make file name unique by adding a UUID at the end + of the file name. By default, ``False``. + + Returns + ------- + Path + Path to output file. + """ + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + self.pool, partial(fn, pdf_bytes, **kwargs) + ) + return result
+
+ + + +def _read_pdf(pdf_bytes, **kwargs): + """Utility function so that pdftotext.PDF doesn't have to be pickled.""" + pages = read_pdf(pdf_bytes, verbose=False) + return PDFDocument(pages, **kwargs) + + +def _read_pdf_ocr(pdf_bytes, tesseract_cmd, **kwargs): + """Utility function that mimics `_read_pdf`.""" + if tesseract_cmd: + _configure_pytesseract(tesseract_cmd) + + pages = read_pdf_ocr(pdf_bytes, verbose=True) + return PDFDocument(pages, **kwargs) + + +def _configure_pytesseract(tesseract_cmd): + """Set the tesseract_cmd""" + import pytesseract + + pytesseract.pytesseract.tesseract_cmd = tesseract_cmd + + +
+[docs] +async def read_pdf_doc(pdf_bytes, **kwargs): + """Read PDF file from bytes in a Process Pool. + + Parameters + ---------- + pdf_bytes : bytes + Bytes containing PDF file. + **kwargs + Keyword-value arguments to pass to + :class:`elm.web.document.PDFDocument` initializer. + + Returns + ------- + elm.web.document.PDFDocument + PDFDocument instances with pages loaded as text. + """ + return await PDFLoader.call(_read_pdf, pdf_bytes, **kwargs)
+ + + +
+[docs] +async def read_pdf_doc_ocr(pdf_bytes, **kwargs): + """Read PDF file from bytes using OCR (pytesseract) in a Process Pool. + + Note that Pytesseract must be set up properly for this method to + work. In particular, the `pytesseract.pytesseract.tesseract_cmd` + attribute must be set to point to the pytesseract exe. + + Parameters + ---------- + pdf_bytes : bytes + Bytes containing PDF file. + **kwargs + Keyword-value arguments to pass to + :class:`elm.web.document.PDFDocument` initializer. + + Returns + ------- + elm.web.document.PDFDocument + PDFDocument instances with pages loaded as text. + """ + import pytesseract + + return await PDFLoader.call( + _read_pdf_ocr, + pdf_bytes, + tesseract_cmd=pytesseract.pytesseract.tesseract_cmd, + **kwargs + )
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/openai.html b/_modules/elm/ords/services/openai.html new file mode 100644 index 00000000..eb127b56 --- /dev/null +++ b/_modules/elm/ords/services/openai.html @@ -0,0 +1,492 @@ + + + + + + elm.ords.services.openai — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.openai

+# -*- coding: utf-8 -*-
+"""ELM Ordinances OpenAI service amd utils."""
+import logging
+
+import openai
+
+from elm.base import ApiBase
+from elm.ords.services.base import RateLimitedService
+from elm.ords.services.usage import TimeBoundedUsageTracker
+from elm.utilities.retry import async_retry_with_exponential_backoff
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +def usage_from_response(current_usage, response): + """OpenAI usage parser. + + Parameters + ---------- + current_usage : dict + Dictionary containing current usage information. For OpenAI + trackers, this may contain the keys ``"requests"``, + ``"prompt_tokens"``, and ``"response_tokens"`` if there is + already existing tracking information. Empty dictionaries are + allowed, in which case the three keys above will be added to + this input. + response : openai.Completion + OpenAI Completion object. Must contain a ``usage`` attribute + that + + Returns + ------- + dict + Dictionary with updated usage statistics. + """ + current_usage["requests"] = current_usage.get("requests", 0) + 1 + current_usage["prompt_tokens"] = ( + current_usage.get("prompt_tokens", 0) + response.usage.prompt_tokens + ) + current_usage["response_tokens"] = ( + current_usage.get("response_tokens", 0) + + response.usage.completion_tokens + ) + return current_usage
+ + + +
+[docs] +def count_tokens(messages, model): + """Count the number of tokens in an outgoing set of messages. + + Parameters + ---------- + messages : list + A list of message objects, where the latter is represented + using a dictionary. Each message dictionary must have a + "content" key containing the string to count tokens for. + model : str + The OpenAI model being used. This input will be passed to + :func:`tiktoken.encoding_for_model`. + + Returns + ------- + int + Total number of tokens in the set of messages outgoing to + OpenAI. + + References + ---------- + https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb + """ + message_total = sum( + ApiBase.count_tokens(message["content"], model=model) + 4 + for message in messages + ) + return message_total + 3
+ + + +
+[docs] +class OpenAIService(RateLimitedService): + """OpenAI Chat GPT query service""" + + def __init__(self, client, rate_limit=1e3, rate_tracker=None): + """ + + Parameters + ---------- + client : openai.AsyncOpenAI | openai.AsyncAzureOpenAI + Async OpenAI client instance. Must have an async + `client.chat.completions.create` method. + rate_limit : int | float, optional + Token rate limit (typically per minute, but the time + interval is ultimately controlled by the `rate_tracker` + instance). By default, ``1e3``. + rate_tracker : TimeBoundedUsageTracker, optional + A TimeBoundedUsageTracker instance. This will be used to + track usage per time interval and compare to `rate_limit`. + If ``None``, a `TimeBoundedUsageTracker` instance is created + with default parameters. By default, ``None``. + """ + super().__init__(rate_limit, rate_tracker or TimeBoundedUsageTracker()) + self.client = client + +
+[docs] + async def process( + self, usage_tracker=None, usage_sub_label="default", *, model, **kwargs + ): + """Process a call to OpenAI Chat GPT. + + Note that this method automatically retries queries (with + backoff) if a rate limit error is throw by the API. + + Parameters + ---------- + model : str + OpenAI GPT model to query. + usage_tracker : `elm.ords.services.usage.UsageTracker`, optional + UsageTracker instance. Providing this input will update your + tracker with this call's token usage info. + By default, ``None``. + usage_sub_label : str, optional + Optional label to categorize usage under. This can be used + to track usage related to certain categories. + By default, ``"default"``. + **kwargs + Keyword arguments to be passed to + `client.chat.completions.create`. + + Returns + ------- + str | None + Chat GPT response as a string, or ``None`` if the call + failed. + """ + self._record_prompt_tokens(model, kwargs) + response = await self._call_gpt(model=model, **kwargs) + self._record_completion_tokens(response) + self._record_usage(response, usage_tracker, usage_sub_label) + return _get_response_message(response)
+ + + def _record_prompt_tokens(self, model, kwargs): + """Add prompt token count to rate tracker""" + num_tokens = count_tokens(kwargs.get("messages", []), model) + self.rate_tracker.add(num_tokens) + + def _record_usage(self, response, usage_tracker, usage_sub_label): + """Record token usage for user""" + if usage_tracker is None: + return + usage_tracker.update_from_model(response, sub_label=usage_sub_label) + + def _record_completion_tokens(self, response): + """Add completion token count to rate tracker""" + if response is None: + return + self.rate_tracker.add(response.usage.completion_tokens) + + @async_retry_with_exponential_backoff() + async def _call_gpt(self, **kwargs): + """Query Chat GPT with user inputs""" + try: + return await self.client.chat.completions.create(**kwargs) + except openai.BadRequestError as e: + logger.error("Got 'BadRequestError':") + logger.exception(e)
+ + + +def _get_response_message(response): + """Get message as string from response object""" + if response is None: + return None + return response.choices[0].message.content +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/provider.html b/_modules/elm/ords/services/provider.html new file mode 100644 index 00000000..9c213f21 --- /dev/null +++ b/_modules/elm/ords/services/provider.html @@ -0,0 +1,466 @@ + + + + + + elm.ords.services.provider — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.provider

+# -*- coding: utf-8 -*-
+"""ELM service provider classes."""
+import asyncio
+import logging
+
+from elm.ords.services.queues import (
+    initialize_service_queue,
+    get_service_queue,
+    tear_down_service_queue,
+)
+from elm.ords.utilities.exceptions import ELMOrdsValueError
+
+
+logger = logging.getLogger(__name__)
+
+
+class _RunningProvider:
+    """A running provider for a single service."""
+
+    def __init__(self, service, queue):
+        """
+
+        Parameters
+        ----------
+        service : :class:`elm.ords.services.base.Service`
+            An instance of a single async service to run.
+        queue : :class:`asyncio.Queue`
+            Queue object for the running service.
+        """
+        self.service = service
+        self.queue = queue
+        self.jobs = set()
+
+    async def run(self):
+        """Run the service."""
+        while True:
+            await self.submit_jobs()
+            await self.collect_responses()
+
+    async def submit_jobs(self):
+        """Submit jobs from the queue to processing.
+
+        The service can limit the number of submissions at a time by
+        implementing the ``can_process`` property.
+
+        If the queue is non-empty, the function takes jobs from it
+        iteratively and submits them until the ``can_process`` property
+        of the service returns ``False``. A call to ``can_process`` is
+        submitted between every job pulled from the queue, so enure that
+        method is performant. If the queue is empty, this function does
+        one of two things:
+
+            1) If there are no jobs processing, it waits on the queue
+               to get more jobs and submits them as they come in
+               (assuming the service allows it)
+            2) If there are jobs processing, this function returns
+               without waiting on more jobs from the queue.
+
+        """
+        if not self.service.can_process or self._q_empty_but_still_processing:
+            return
+
+        while self.service.can_process and self._can_fit_jobs:
+            fut, outer_task_name, args, kwargs = await self.queue.get()
+            task = asyncio.create_task(
+                self.service.process_using_futures(fut, *args, **kwargs),
+                name=outer_task_name,
+            )
+            self.queue.task_done()
+            self.jobs.add(task)
+            await self._allow_service_to_update()
+
+        return
+
+    async def _allow_service_to_update(self):
+        """Switch contexts, allowing service to update if it can process"""
+        await asyncio.sleep(0)
+
+    @property
+    def _q_empty_but_still_processing(self):
+        """bool: Queue empty but jobs still running (don't await queue)"""
+        return self.queue.empty() and self.jobs
+
+    @property
+    def _can_fit_jobs(self):
+        """bool: Job tracker not full"""
+        return len(self.jobs) < self.service.MAX_CONCURRENT_JOBS
+
+    async def collect_responses(self):
+        """Collect responses from the service.
+
+        This call will block further submissions to the service until
+        at least one job finishes.
+        """
+        if not self.jobs:
+            return
+
+        complete, __ = await asyncio.wait(
+            self.jobs, return_when=asyncio.FIRST_COMPLETED
+        )
+
+        for job in complete:
+            self.jobs.remove(job)
+
+
+
+[docs] +class RunningAsyncServices: + """Async context manager for running services.""" + + def __init__(self, services): + """ + + Parameters + ---------- + services : iterable + An iterable of async services to run during program + execution. + """ + self.services = services + self.__providers = [] + self._validate_services() + + def _validate_services(self): + """Validate input services.""" + if len(self.services) < 1: + raise ELMOrdsValueError( + "Must provide at least one service to run!" + ) + + def _reset_providers(self): + """Reset running providers""" + for c in self.__providers: + c.cancel() + self.__providers = [] + + async def __aenter__(self): + for service in self.services: + logger.debug("Initializing Service: %s", service.name) + queue = initialize_service_queue(service.name) + service.acquire_resources() + task = asyncio.create_task(_RunningProvider(service, queue).run()) + self.__providers.append(task) + + async def __aexit__(self, exc_type, exc, tb): + try: + for service in self.services: + await get_service_queue(service.name).join() + service.release_resources() + finally: + self._reset_providers() + for service in self.services: + logger.debug("Tearing down Service: %s", service.name) + tear_down_service_queue(service.name)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/queues.html b/_modules/elm/ords/services/queues.html new file mode 100644 index 00000000..cbb4d8c4 --- /dev/null +++ b/_modules/elm/ords/services/queues.html @@ -0,0 +1,375 @@ + + + + + + elm.ords.services.queues — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.queues

+# -*- coding: utf-8 -*-
+"""Module for "singleton" QUERIES dictionary"""
+import asyncio
+
+
+_QUEUES = {}
+
+
+
+[docs] +def initialize_service_queue(service_name): + """Initialize an `asyncio.Queue()` for a service. + + Repeated calls to this function return the same queue + + Parameters + ---------- + service_name : str + Name of service to initialize queue for. + + Returns + ------- + asyncio.Queue() + Queue instance for this service. + """ + return _QUEUES.setdefault(service_name, asyncio.Queue())
+ + + +
+[docs] +def tear_down_service_queue(service_name): + """Remove the queue for a service. + + The queue does not have to exist, so repeated calls to this function + are OK. + + Parameters + ---------- + service_name : str + Name of service to delete queue for. + """ + _QUEUES.pop(service_name, None)
+ + + +
+[docs] +def get_service_queue(service_name): + """Retrieve the queue for a service. + + Parameters + ---------- + service_name : str + Name of service to retrieve queue for. + + Returns + ------- + asyncio.Queue() | None + Queue instance for this service, or `None` if the queue was not + initialized. + """ + return _QUEUES.get(service_name)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/threaded.html b/_modules/elm/ords/services/threaded.html new file mode 100644 index 00000000..f4260d0c --- /dev/null +++ b/_modules/elm/ords/services/threaded.html @@ -0,0 +1,648 @@ + + + + + + elm.ords.services.threaded — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.threaded

+# -*- coding: utf-8 -*-
+# pylint: disable=consider-using-with
+"""ELM Ordinance Threaded services"""
+import json
+import shutil
+import asyncio
+from pathlib import Path
+from functools import partial
+from abc import abstractmethod
+from tempfile import TemporaryDirectory
+from concurrent.futures import ThreadPoolExecutor
+
+from elm.ords.services.base import Service
+from elm.web.utilities import write_url_doc_to_file
+
+
+def _move_file(doc, out_dir):
+    """Move a file from a temp directory to an output directory."""
+    cached_fp = doc.metadata.get("cache_fn")
+    if cached_fp is None:
+        return
+
+    cached_fp = Path(cached_fp)
+    out_fn = doc.metadata.get("location_name", cached_fp.name)
+    if not out_fn.endswith(cached_fp.suffix):
+        out_fn = f"{out_fn}{cached_fp.suffix}"
+
+    out_fp = Path(out_dir) / out_fn
+    shutil.move(cached_fp, out_fp)
+    return out_fp
+
+
+def _write_cleaned_file(doc, out_dir):
+    """Write cleaned ordinance text to directory."""
+    cleaned_text = doc.metadata.get("cleaned_ordinance_text")
+    location_name = doc.metadata.get("location_name")
+
+    if cleaned_text is None or location_name is None:
+        return
+
+    out_fp = Path(out_dir) / f"{location_name} Summary.txt"
+    with open(out_fp, "w", encoding="utf-8") as fh:
+        fh.write(cleaned_text)
+    return out_fp
+
+
+def _write_ord_db(doc, out_dir):
+    """Write parsed ordinance database to directory."""
+    ord_db = doc.metadata.get("ordinance_values")
+    location_name = doc.metadata.get("location_name")
+
+    if ord_db is None or location_name is None:
+        return
+
+    out_fp = Path(out_dir) / f"{location_name} Ordinances.csv"
+    ord_db.to_csv(out_fp, index=False)
+    return out_fp
+
+
+_PROCESSING_FUNCTIONS = {
+    "move": _move_file,
+    "write_clean": _write_cleaned_file,
+    "write_db": _write_ord_db,
+}
+
+
+
+[docs] +class ThreadedService(Service): + """Service that contains a ThreadPoolExecutor instance""" + + def __init__(self, **kwargs): + """ + + Parameters + ---------- + **kwargs + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ThreadPoolExecutor`. + By default, ``None``. + """ + self._tpe_kwargs = kwargs or {} + self.pool = None + +
+[docs] + def acquire_resources(self): + """Open thread pool and temp directory""" + self.pool = ThreadPoolExecutor(**self._tpe_kwargs)
+ + +
+[docs] + def release_resources(self): + """Shutdown thread pool and cleanup temp directory""" + self.pool.shutdown(wait=True, cancel_futures=True)
+
+ + + +
+[docs] +class TempFileCache(ThreadedService): + """Service that locally caches files downloaded from the internet""" + + def __init__(self, td_kwargs=None, tpe_kwargs=None): + """ + + Parameters + ---------- + td_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`tempfile.TemporaryDirectory`. By default, ``None``. + tpe_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ThreadPoolExecutor`. + By default, ``None``. + """ + super().__init__(**(tpe_kwargs or {})) + self._td_kwargs = td_kwargs or {} + self._td = None + + @property + def can_process(self): + """bool: Always ``True`` (limiting is handled by asyncio)""" + return True + +
+[docs] + def acquire_resources(self): + """Open thread pool and temp directory""" + super().acquire_resources() + self._td = TemporaryDirectory(**self._td_kwargs)
+ + +
+[docs] + def release_resources(self): + """Shutdown thread pool and cleanup temp directory""" + self._td.cleanup() + super().release_resources()
+ + +
+[docs] + async def process(self, doc, file_content, make_name_unique=False): + """Write URL doc to file asynchronously. + + Parameters + ---------- + doc : elm.web.document.Document + Document containing meta information about the file. Must + have a "source" key in the `metadata` dict containing the + URL, which will be converted to a file name using + :func:`compute_fn_from_url`. + file_content : str | bytes + File content, typically string text for HTML files and bytes + for PDF file. + make_name_unique : bool, optional + Option to make file name unique by adding a UUID at the end + of the file name. By default, ``False``. + + Returns + ------- + Path + Path to output file. + """ + loop = asyncio.get_running_loop() + result = await loop.run_in_executor( + self.pool, + partial( + write_url_doc_to_file, + doc, + file_content, + self._td.name, + make_name_unique=make_name_unique, + ), + ) + return result
+
+ + + +
+[docs] +class StoreFileOnDisk(ThreadedService): + """Abstract service that manages the storage of a file on disk. + + Storage can occur due to creation or a move of a file. + """ + + def __init__(self, out_dir, tpe_kwargs=None): + """ + + Parameters + ---------- + out_dir : path-like + Path to output directory where file should be stored. + tpe_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ThreadPoolExecutor`. + By default, ``None``. + """ + super().__init__(**(tpe_kwargs or {})) + self.out_dir = out_dir + + @property + def can_process(self): + """bool: Always ``True`` (limiting is handled by asyncio)""" + return True + +
+[docs] + async def process(self, doc): + """Store file in out directory. + + Parameters + ---------- + doc : elm.web.document.Document + Document containing meta information about the file. Must + have relevant processing keys in the `metadata` dict, + otherwise the file may not be stored in the output + directory. + + Returns + ------- + Path | None + Path to output file, or `None` if no file was stored. + """ + return await _run_func_in_pool( + self.pool, + partial(_PROCESSING_FUNCTIONS[self._PROCESS], doc, self.out_dir), + )
+ + + @property + @abstractmethod + def _PROCESS(self): + """str: Key in `_PROCESSING_FUNCTIONS` that defines the doc func.""" + raise NotImplementedError
+ + + +
+[docs] +class FileMover(StoreFileOnDisk): + """Service that moves files to an output directory""" + + _PROCESS = "move"
+ + + +
+[docs] +class CleanedFileWriter(StoreFileOnDisk): + """Service that writes cleaned text to a file""" + + _PROCESS = "write_clean"
+ + + +
+[docs] +class OrdDBFileWriter(StoreFileOnDisk): + """Service that writes cleaned text to a file""" + + _PROCESS = "write_db"
+ + + +
+[docs] +class UsageUpdater(ThreadedService): + """Service that updates usage info from a tracker into a file.""" + + def __init__(self, usage_fp, tpe_kwargs=None): + """ + + Parameters + ---------- + usage_fp : path-like + Path to JSON file where usage should be tracked. + tpe_kwargs : dict, optional + Keyword-value argument pairs to pass to + :class:`concurrent.futures.ThreadPoolExecutor`. + By default, ``None``. + """ + super().__init__(**(tpe_kwargs or {})) + self.usage_fp = usage_fp + self._is_processing = False + + @property + def can_process(self): + """bool: ``True`` if file not currently being written to.``""" + return not self._is_processing + +
+[docs] + async def process(self, tracker): + """Add usage from tracker to file. + + Any existing usage info in the file will remain unchanged + EXCEPT for anything under the label of the input `tracker`, + all of which will be replaced with info from the tracker itself. + + Parameters + ---------- + tracker : elm.ods.services.usage.UsageTracker + A usage tracker instance that contains usage info to be + added to output file. + """ + self._is_processing = True + await _run_func_in_pool( + self.pool, partial(_dump_usage, self.usage_fp, tracker) + ) + self._is_processing = False
+
+ + + +async def _run_func_in_pool(pool, callable_fn): + """Run a callable in process pool""" + loop = asyncio.get_running_loop() + return await loop.run_in_executor(pool, callable_fn) + + +def _dump_usage(fp, tracker): + """Dump usage to an existing file.""" + if not Path(fp).exists(): + usage_info = {} + else: + with open(fp, "r") as fh: + usage_info = json.load(fh) + + tracker.add_to(usage_info) + with open(fp, "w") as fh: + json.dump(usage_info, fh, indent=4) +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/services/usage.html b/_modules/elm/ords/services/usage.html new file mode 100644 index 00000000..721ccad6 --- /dev/null +++ b/_modules/elm/ords/services/usage.html @@ -0,0 +1,499 @@ + + + + + + elm.ords.services.usage — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.services.usage

+# -*- coding: utf-8 -*-
+"""ELM Ordinances usage tracking utilities."""
+import time
+import logging
+from collections import UserDict, deque
+from functools import total_ordering
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +@total_ordering +class TimedEntry: + """An entry that performs comparisons based on time added, not value. + + Examples + -------- + >>> a = TimedEntry(100) + >>> a > 1000 + True + """ + + def __init__(self, value): + """ + + Parameters + ---------- + value : obj + Some value to store as an entry. + """ + self.value = value + self._time = time.monotonic() + + def __eq__(self, other): + return self._time == other + + def __lt__(self, other): + return self._time < other
+ + + +
+[docs] +class TimeBoundedUsageTracker: + """Track usage of a resource over time. + + This class wraps a double-ended queue, and any inputs older than + a certain time are dropped. Those values are also subtracted from + the running total. + + References + ---------- + https://stackoverflow.com/questions/51485656/efficient-time-bound-queue-in-python + """ + + def __init__(self, max_seconds=70): + """ + + Parameters + ---------- + max_seconds : int, optional + Maximum age in seconds of an element before it is dropped + from consideration. By default, ``65``. + """ + self.max_seconds = max_seconds + self._total = 0 + self._q = deque() + + @property + def total(self): + """float: Total value of all entries younger than `max_seconds`""" + self._discard_old_values() + return self._total + +
+[docs] + def add(self, value): + """Add a value to track. + + Parameters + ---------- + value : int | float + A new value to add to the queue. It's total will be added to + the running total, and it will live for `max_seconds` before + being discarded. + """ + self._q.append(TimedEntry(value)) + self._total += value
+ + + def _discard_old_values(self): + """Discard 'old' values from the queue""" + cutoff_time = time.monotonic() - self.max_seconds + try: + while self._q[0] < cutoff_time: + self._total -= self._q.popleft().value + except IndexError: + pass
+ + + +
+[docs] +class UsageTracker(UserDict): + """Rate or AIP usage tracker.""" + + def __init__(self, label, response_parser): + """ + + Parameters + ---------- + label : str + Top-level label to use when adding this usage information to + another dictionary. + response_parser : callable + A callable that takes the current usage info (in dictionary + format) and an LLm response as inputs, updates the usage + dictionary with usage info based on the response, and + returns the updated dictionary. See, for example, + :func:`elm.ords.services.openai.usage_from_response`. + """ + super().__init__() + self.label = label + self.response_parser = response_parser + +
+[docs] + def add_to(self, other): + """Add the contents of this usage information to another dict. + + The contents of this dictionary are stored under the `label` + key that this object was initialized with. + + Parameters + ---------- + other : dict + A dictionary to add the contents of this one to. + """ + other.update({self.label: {**self, "tracker_totals": self.totals}})
+ + + @property + def totals(self): + """Compute total usage across all sub-labels. + + Returns + ------- + dict + Dictionary containing usage information totaled across all + sub-labels. + """ + totals = {} + for report in self.values(): + try: + sub_label_report = report.items() + except AttributeError: + continue + + for tracked_value, count in sub_label_report: + totals[tracked_value] = totals.get(tracked_value, 0) + count + return totals + +
+[docs] + def update_from_model(self, response=None, sub_label="default"): + """Update usage from a model response. + + Parameters + ---------- + response : object, optional + Model call response, which either contains usage information + or can be used to infer/compute usage. If ``None``, no + update is made. + sub_label : str, optional + Optional label to categorize usage under. This can be used + to track usage related to certain categories. + By default, ``"default"``. + """ + if response is None: + return + + self[sub_label] = self.response_parser( + self.get(sub_label, {}), response + )
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/utilities/counties.html b/_modules/elm/ords/utilities/counties.html new file mode 100644 index 00000000..bca2f323 --- /dev/null +++ b/_modules/elm/ords/utilities/counties.html @@ -0,0 +1,452 @@ + + + + + + elm.ords.utilities.counties — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.utilities.counties

+# -*- coding: utf-8 -*-
+"""ELM Ordinance county info"""
+import os
+import logging
+from warnings import warn
+
+import pandas as pd
+
+from elm import ELM_DIR
+from elm.ords.utilities.exceptions import ELMOrdsValueError
+
+
+logger = logging.getLogger(__name__)
+_COUNTY_DATA_FP = os.path.join(ELM_DIR, "ords", "data", "conus_counties.csv")
+
+
+
+[docs] +def load_all_county_info(): + """Load DataFrame containing info like names and websites for all counties. + + Returns + ------- + pd.DataFrame + DataFrame containing county info like names, FIPS, websites, + etc. for all counties. + """ + county_info = pd.read_csv(_COUNTY_DATA_FP) + county_info = _convert_to_title(county_info, "County") + county_info = _convert_to_title(county_info, "State") + return county_info
+ + + +
+[docs] +def county_websites(county_info=None): + """Load mapping of county name and state to website. + + Parameters + ---------- + county_info : pd.DataFrame, optional + DataFrame containing county names and websites. If ``None``, + this info is loaded using :func:`load_county_info`. + By default, ``None``. + + Returns + ------- + dict + Dictionary where keys are tuples of (county, state) and keys are + the relevant website URL. Note that county and state names are + lowercase. + """ + if county_info is None: + county_info = load_all_county_info() + + return { + (row["County"].casefold(), row["State"].casefold()): row["Website"] + for __, row in county_info.iterrows() + }
+ + + +
+[docs] +def load_counties_from_fp(county_fp): + """Load county info base don counties in the input fp. + + Parameters + ---------- + county_fp : path-like + Path to csv file containing "County" and "State" columns that + define the counties for which info should be loaded. + + Returns + ------- + pd.DataFrame + DataFrame containing county info like names, FIPS, websites, + etc. for all requested counties (that were found). + """ + counties = pd.read_csv(county_fp) + _validate_county_input(counties) + + counties = _convert_to_title(counties, "County") + counties = _convert_to_title(counties, "State") + + all_county_info = load_all_county_info() + counties = counties.merge( + all_county_info, on=["County", "State"], how="left" + ) + + counties = _filter_not_found_counties(counties) + return _format_county_df_for_output(counties)
+ + + +def _validate_county_input(df): + """Throw error if user is missing required columns""" + expected_cols = ["County", "State"] + missing = [col for col in expected_cols if col not in df] + if missing: + msg = ( + "The following required columns were not found in the county " + f"input: {missing}" + ) + raise ELMOrdsValueError(msg) + + +def _filter_not_found_counties(df): + """Filter out counties with null FIPS codes.""" + _warn_about_missing_counties(df) + return df[~df.FIPS.isna()].copy() + + +def _warn_about_missing_counties(df): + """Throw warning about counties that were not found in the main list.""" + not_found_counties = df[df.FIPS.isna()] + if len(not_found_counties): + not_found_counties_str = not_found_counties[ + ["County", "State"] + ].to_markdown(index=False, tablefmt="psql") + msg = ( + "The following counties were not found! Please make sure to " + "use proper spelling and capitalization.\n" + f"{not_found_counties_str}" + ) + logger.warning(msg) + warn(msg) + + +def _format_county_df_for_output(df): + """Format county DataFrame for output.""" + out_cols = ["County", "State", "County Type", "FIPS", "Website"] + df.FIPS = df.FIPS.astype(int) + return df[out_cols].reset_index(drop=True) + + +def _convert_to_title(df, column): + """Convert the values of a DataFrame column to titles.""" + df[column] = df[column].str.strip().str.casefold().str.title() + return df +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/utilities/exceptions.html b/_modules/elm/ords/utilities/exceptions.html new file mode 100644 index 00000000..3ca4b59d --- /dev/null +++ b/_modules/elm/ords/utilities/exceptions.html @@ -0,0 +1,353 @@ + + + + + + elm.ords.utilities.exceptions — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for elm.ords.utilities.exceptions

+# -*- coding: utf-8 -*-
+"""Custom Exceptions and Errors for ELM Ordinances. """
+import logging
+
+from elm.exceptions import ELMError
+
+
+logger = logging.getLogger("elm")
+
+
+
+[docs] +class ELMOrdsError(ELMError): + """Generic ELM Ordinance Error.""" + + def __init__(self, *args, **kwargs): + """Init exception and broadcast message to logger.""" + super().__init__(*args, **kwargs) + if args: + logger.error(str(args[0]), stacklevel=2)
+ + + +
+[docs] +class ELMOrdsNotInitializedError(ELMOrdsError): + """ELM Ordinances not initialized error."""
+ + + +
+[docs] +class ELMOrdsValueError(ELMOrdsError, ValueError): + """ELM Ordinances ValueError."""
+ + + +
+[docs] +class ELMOrdsRuntimeError(ELMOrdsError, RuntimeError): + """ELM Ordinances RuntimeError."""
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/utilities/location.html b/_modules/elm/ords/utilities/location.html new file mode 100644 index 00000000..c1da7b91 --- /dev/null +++ b/_modules/elm/ords/utilities/location.html @@ -0,0 +1,389 @@ + + + + + + elm.ords.utilities.location — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.utilities.location

+# -*- coding: utf-8 -*-
+"""ELM Ordinance location specification utilities"""
+from abc import ABC, abstractmethod
+
+
+
+[docs] +class Location(ABC): + """Abstract location representation.""" + + def __init__(self, name): + """ + + Parameters + ---------- + name : str + Name of location. + """ + self.name = name + + @property + @abstractmethod + def full_name(self): + """str: Full name of location"""
+ + + +
+[docs] +class County(Location): + """Class representing a county""" + + def __init__(self, name, state, fips=None, is_parish=False): + """ + + Parameters + ---------- + name : str + Name of the county. + state : str + State containing the county. + fips : int | str, optional + Optional county FIPS code. By default, ``None``. + is_parish : bool, optional + Flag indicating wether or not this county is classified as + a parish. By default, ``False``. + """ + super().__init__(name) + self.state = state + self.fips = fips + self.is_parish = is_parish + + @property + def full_name(self): + """str: Full county name in format '{name} County, {state}'""" + loc_id = "Parish" if self.is_parish else "County" + return f"{self.name} {loc_id}, {self.state}" + + def __repr__(self): + return f"County({self.name}, {self.state}, is_parish={self.is_parish})" + + def __str__(self): + return self.full_name + + def __eq__(self, other): + if isinstance(other, self.__class__): + return ( + self.name.casefold() == other.name.casefold() + and self.state.casefold() == other.state.casefold() + and self.is_parish == other.is_parish + ) + if isinstance(other, str): + return ( + self.full_name.casefold() == other.casefold() + or f"{self.name}, {self.state}".casefold() == other.casefold() + ) + return False
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/utilities/parsing.html b/_modules/elm/ords/utilities/parsing.html new file mode 100644 index 00000000..d62bc6c8 --- /dev/null +++ b/_modules/elm/ords/utilities/parsing.html @@ -0,0 +1,390 @@ + + + + + + elm.ords.utilities.parsing — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.utilities.parsing

+# -*- coding: utf-8 -*-
+"""ELM Ordinances parsing utilities."""
+import json
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +def llm_response_as_json(content): + """LLM response to JSON. + + Parameters + ---------- + content : str + LLM response that contains a string representation of + a JSON file. + + Returns + ------- + dict + Response parsed into dictionary. This dictionary will be empty + if the response cannot be parsed by JSON. + """ + content = content.lstrip().rstrip() + content = content.lstrip("```").lstrip("json").lstrip("\n") + content = content.rstrip("```") + content = content.replace("True", "true").replace("False", "false") + try: + content = json.loads(content) + except json.decoder.JSONDecodeError: + logger.error( + "LLM returned improperly formatted JSON. " + "This is likely due to the completion running out of tokens. " + "Setting a higher token limit may fix this error. " + "Also ensure you are requesting JSON output in your prompt. " + "JSON returned:\n%s", + content, + ) + content = {} + return content
+ + + +# fmt: off +
+[docs] +def merge_overlapping_texts(text_chunks, n=300): + """Merge chunks fo text by removing any overlap. + + Parameters + ---------- + text_chunks : iterable of str + Iterable containing text chunks which may or may not contain + consecutive overlapping portions. + n : int, optional + Number of characters to check at the beginning of each message + for overlap with the previous message. By default, ``100``. + + Returns + ------- + str + Merged text. + """ + if not text_chunks: + return "" + + out_text = text_chunks[0] + for next_text in text_chunks[1:]: + start_ind = out_text[-2 * n:].find(next_text[:n]) + if start_ind == -1: + out_text = "\n".join([out_text, next_text]) + continue + start_ind = 2 * n - start_ind + out_text = "".join([out_text, next_text[start_ind:]]) + return out_text
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/utilities/queued_logging.html b/_modules/elm/ords/utilities/queued_logging.html new file mode 100644 index 00000000..ab5c7354 --- /dev/null +++ b/_modules/elm/ords/utilities/queued_logging.html @@ -0,0 +1,608 @@ + + + + + + elm.ords.utilities.queued_logging — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + + +
  • +
  • +
+
+
+
+
+ +

Source code for elm.ords.utilities.queued_logging

+# -*- coding: utf-8 -*-
+"""ELM Ordinance queued logging.
+
+This module implements queued logging, mostly following this blog:"
+https://www.zopatista.com/python/2019/05/11/asyncio-logging/
+"""
+import asyncio
+import logging
+from pathlib import Path
+from queue import SimpleQueue
+from logging.handlers import QueueHandler, QueueListener
+
+
+LOGGING_QUEUE = SimpleQueue()
+
+
+
+[docs] +class NoLocationFilter(logging.Filter): + """Filter that catches all records without a location attribute.""" + +
+[docs] + def filter(self, record): + """Filter logging record. + + Parameters + ---------- + record : logging.LogRecord + Log record containing the log message + default attributes. + If the ``location`` attribute is missing or is a string in + the form "Task-XX", the filter returns ``True`` (i.e. record + is emitted). + + Returns + ------- + bool + If the record's ``location`` attribute is "missing". + """ + record_location = getattr(record, "location", None) + return record_location is None or "Task-" in record_location
+
+ + + +
+[docs] +class LocationFilter(logging.Filter): + """Filter down to logs from a coroutine processing a specific location.""" + + def __init__(self, location): + """ + + Parameters + ---------- + location : str + Location identifier. For example, ``"El Paso Colorado"``. + """ + self.location = location + +
+[docs] + def filter(self, record): + """Filter logging record. + + Parameters + ---------- + record : logging.LogRecord + Log record containing the log message + default attributes. + Must have a ``location`` attribute that is a string + identifier, or this function will return ``False`` every + time. The ``location`` identifier will be checked against + the filter's location attribute to determine the output + result. + + Returns + ------- + bool + If the record's ``location`` attribute matches the filter's + ``location`` attribute. + """ + record_location = getattr(record, "location", None) + return record_location is not None and record_location == self.location
+
+ + + +
+[docs] +class LocalProcessQueueHandler(QueueHandler): + """QueueHandler that works within a single process (locally).""" + +
+[docs] + def emit(self, record): + """Emit record with a location attribute equal to current asyncio task. + + Parameters + ---------- + record : logging.LogRecord + Log record containing the log message + default attributes. + This record will get a ``location`` attribute dynamically + added, with a value equal to the name of the current asyncio + task (i.e. ``asyncio.current_task().get_name()``). + """ + record.location = asyncio.current_task().get_name() + try: + self.enqueue(record) + except asyncio.CancelledError: + raise + except Exception: + self.handleError(record)
+
+ + + +
+[docs] +class LogListener: + """Class to listen to logging queue from coroutines and write to files.""" + + def __init__(self, logger_names, level="INFO"): + """ + + Parameters + ---------- + logger_names : iterable + An iterable of string, where each string is a logger name. + The logger corresponding to each of the names will be + equipped with a logging queue handler. + level : str, optional + Log level to set for each logger. By default, ``"INFO"``. + """ + self.logger_names = logger_names + self.level = level + self._listener = None + self._queue_handler = LocalProcessQueueHandler(LOGGING_QUEUE) + + def _setup_listener(self): + """Set up the queue listener""" + if self._listener is not None: + return + self._listener = QueueListener( + LOGGING_QUEUE, logging.NullHandler(), respect_handler_level=True + ) + self._listener.handlers = list(self._listener.handlers) + + def _add_queue_handler_to_loggers(self): + """Add a queue handler to each logger requested by user""" + for logger_name in self.logger_names: + logger = logging.getLogger(logger_name) + logger.addHandler(self._queue_handler) + logger.setLevel(self.level) + + def _remove_queue_handler_from_loggers(self): + """Remove the queue handler from each logger requested by user""" + for logger_name in self.logger_names: + logging.getLogger(logger_name).removeHandler(self._queue_handler) + + def _remove_all_handlers_from_listener(self): + """Remove all handlers still attached to listener.""" + if self._listener is None: + return + for handler in self._listener.handlers: + handler.close() + self._listener.handlers.remove(handler) + + def __enter__(self): + self._setup_listener() + self._add_queue_handler_to_loggers() + self._listener.start() + return self + + def __exit__(self, exc_type, exc, tb): + self._listener.stop() + self._remove_queue_handler_from_loggers() + self._remove_all_handlers_from_listener() + + async def __aenter__(self): + return self.__enter__() + + async def __aexit__(self, exc_type, exc, tb): + self.__exit__(exc_type, exc, tb) + +
+[docs] + def addHandler(self, handler): + """Add a handler to the queue listener. + + Logs that are sent to the queue will be emitted to the handler. + + Parameters + ---------- + handler : logging.Handler + Log handler to parse log records. + """ + if handler not in self._listener.handlers: + self._listener.handlers.append(handler)
+ + +
+[docs] + def removeHandler(self, handler): + """Remove a handler from the queue listener. + + Logs that are sent to the queue will no longer be emitted to the + handler. + + Parameters + ---------- + handler : logging.Handler + Log handler to remove from queue listener. + """ + if handler in self._listener.handlers: + handler.close() + self._listener.handlers.remove(handler)
+
+ + + +
+[docs] +class LocationFileLog: + """Context manager to write logs for a location to a unique file.""" + + def __init__(self, listener, log_dir, location, level="INFO"): + """ + + Parameters + ---------- + listener : :class:`~elm.ords.utilities.queued_logging.LoggingListener` + A listener instance. The file handler will be added to this + listener. + log_dir : path-like + Path to output directory to contain log file. + location : str + Location identifier. For example, ``"El Paso Colorado"``. + This string will become part of the file name, so it must + contain only characters valid in a file name. + level : str, optional + Log level. By default, ``"INFO"``. + """ + self.log_dir = Path(log_dir) + self.location = location + self.level = level + self._handler = None + self._listener = listener + + def _create_log_dir(self): + """Create log output directory if it doesn't exist.""" + self.log_dir.mkdir(exist_ok=True, parents=True) + + def _setup_handler(self): + """Setup the file handler for this location.""" + self._handler = logging.FileHandler( + self.log_dir / f"{self.location}.log", encoding="utf-8" + ) + self._handler.setLevel(self.level) + self._handler.addFilter(LocationFilter(self.location)) + + def _break_down_handler(self): + """Tear down the file handler for this location.""" + if self._handler is None: + return + + self._handler.close() + self._handler = None + + def _add_handler_to_listener(self): + """Add the file handler for this location to the queue listener.""" + if self._handler is None: + raise ValueError("Must set up handler before listener!") + + self._listener.addHandler(self._handler) + + def _remove_handler_from_listener(self): + """Remove the file handler for this location from the listener.""" + if self._handler is None: + return + + self._listener.removeHandler(self._handler) + + def __enter__(self): + self._create_log_dir() + self._setup_handler() + self._add_handler_to_listener() + + def __exit__(self, exc_type, exc, tb): + self._remove_handler_from_listener() + self._break_down_handler() + + async def __aenter__(self): + self.__enter__() + + async def __aexit__(self, exc_type, exc, tb): + self.__exit__(exc_type, exc, tb)
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/validation/content.html b/_modules/elm/ords/validation/content.html new file mode 100644 index 00000000..3e9302e3 --- /dev/null +++ b/_modules/elm/ords/validation/content.html @@ -0,0 +1,525 @@ + + + + + + elm.ords.validation.content — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.validation.content

+# -*- coding: utf-8 -*-
+"""ELM Ordinance document content Validation logic
+
+These are primarily used to validate that a legal document applies to a
+particular technology (e.g. Large Wind Energy Conversion Systems).
+"""
+import logging
+
+
+logger = logging.getLogger(__name__)
+NOT_WIND_WORDS = [
+    "windy",
+    "winds",
+    "window",
+    "windiest",
+    "windbreak",
+    "windshield",
+    "wind blow",
+    "wind erosion",
+    "rewind",
+    "mini wecs",
+    "swecs",
+    "private wecs",
+    "pwecs",
+    "wind direction",
+    "wind movement",
+    "wind attribute",
+    "wind runway",
+    "wind load",
+    "wind orient",
+    "wind damage",
+]
+GOOD_WIND_KEYWORDS = ["wind", "setback"]
+GOOD_WIND_ACRONYMS = ["wecs", "wes", "lwet", "uwet", "wef"]
+_GOOD_ACRONYM_CONTEXTS = [
+    " {acronym} ",
+    " {acronym}\n",
+    " {acronym}.",
+    "\n{acronym} ",
+    "\n{acronym}.",
+    "\n{acronym}\n",
+    "({acronym} ",
+    " {acronym})",
+]
+GOOD_WIND_PHRASES = ["wind energy conversion", "wind turbine", "wind tower"]
+
+
+
+[docs] +class ValidationWithMemory: + """Validate a set of text chunks by sometimes looking at previous chunks""" + + def __init__(self, structured_llm_caller, text_chunks, num_to_recall=2): + """ + + Parameters + ---------- + structured_llm_caller : elm.ords.llm.StructuredLLMCaller + StructuredLLMCaller instance. Used for structured validation + queries. + text_chunks : list of str + List of strings, each of which represent a chunk of text. + The order of the strings should be the order of the text + chunks. This validator may refer to previous text chunks to + answer validation questions. + num_to_recall : int, optional + Number of chunks to check for each validation call. This + includes the original chunk! For example, if + `num_to_recall=2`, the validator will first check the chunk + at the requested index, and then the previous chunk as well. + By default, ``2``. + """ + self.slc = structured_llm_caller + self.text_chunks = text_chunks + self.num_to_recall = num_to_recall + self.memory = [{} for _ in text_chunks] + + # fmt: off + def _inverted_mem(self, starting_ind): + """Inverted memory.""" + inverted_mem = self.memory[:starting_ind + 1:][::-1] + yield from inverted_mem[:self.num_to_recall] + + # fmt: off + def _inverted_text(self, starting_ind): + """Inverted text chunks""" + inverted_text = self.text_chunks[:starting_ind + 1:][::-1] + yield from inverted_text[:self.num_to_recall] + +
+[docs] + async def parse_from_ind(self, ind, prompt, key): + """Validate a chunk of text. + + Validation occurs by querying the LLM using the input prompt and + parsing the `key` from the response JSON. The prompt should + request that the key be a boolean output. If the key retrieved + from the LLM response is False, a number of previous text chunks + are checked as well, using the same prompt. This can be helpful + in cases where the answer to the validation prompt (e.g. does + this text pertain to a large WECS?) is only found in a previous + text chunk. + + Parameters + ---------- + ind : int + Positive integer corresponding to the chunk index. + Must be less than `len(text_chunks)`. + prompt : str + Input LLM system prompt that describes the validation + question. This should request a JSON output from the LLM. + It should also take `key` as a formatting input. + key : str + A key expected in the JSON output of the LLM containing the + response for the validation question. This string will also + be used to format the system prompt before it is passed to + the LLM. + + Returns + ------- + bool + ``True`` if the LLM returned ``True`` for this text chunk or + `num_to_recall-1` text chunks before it. + ``False`` otherwise. + """ + logger.debug("Checking %r for ind %d", key, ind) + mem_text = zip(self._inverted_mem(ind), self._inverted_text(ind)) + for step, (mem, text) in enumerate(mem_text): + logger.debug("Mem at ind %d is %s", step, mem) + check = mem.get(key) + if check is None: + # logger.debug("text=%s", text) + content = await self.slc.call( + sys_msg=prompt.format(key=key), + content=text, + usage_sub_label="document_content_validation", + ) + check = mem[key] = content.get(key, False) + if check: + return check + return False
+
+ + + +
+[docs] +def possibly_mentions_wind(text, match_count_threshold=1): + """Perform a heuristic check for mention of wind energy in text. + + This check first strips the text of any wind "look-alike" words + (e.g. "window", "windshield", etc). Then, it checks for particular + keywords, acronyms, and phrases that pertain to wind in the text. + If enough keywords are mentions (as dictated by + `match_count_threshold`), this check returns ``True``. + + Parameters + ---------- + text : str + Input text that may or may not mention win in relation to wind + energy. + match_count_threshold : int, optional + Number of keywords that must match for the text to pass this + heuristic check. Count must be strictly greater than this value. + By default, ``1``. + + Returns + ------- + bool + ``True`` if the number of keywords/acronyms/phrases detected + exceeds the `match_count_threshold`. + """ + heuristics_text = _convert_to_heuristics_text(text) + total_keyword_matches = _count_single_keyword_matches(heuristics_text) + total_keyword_matches += _count_acronym_matches(heuristics_text) + total_keyword_matches += _count_phrase_matches(heuristics_text) + return total_keyword_matches > match_count_threshold
+ + + +def _convert_to_heuristics_text(text): + """Convert text for heuristic wind content parsing""" + heuristics_text = text.casefold() + for word in NOT_WIND_WORDS: + heuristics_text = heuristics_text.replace(word, "") + return heuristics_text + + +def _count_single_keyword_matches(heuristics_text): + """Count number of good wind energy keywords that appear in text.""" + return sum(keyword in heuristics_text for keyword in GOOD_WIND_KEYWORDS) + + +def _count_acronym_matches(heuristics_text): + """Count number of good wind energy acronyms that appear in text.""" + acronym_matches = 0 + for context in _GOOD_ACRONYM_CONTEXTS: + acronym_keywords = { + context.format(acronym=acronym) for acronym in GOOD_WIND_ACRONYMS + } + acronym_matches = sum( + keyword in heuristics_text for keyword in acronym_keywords + ) + if acronym_matches > 0: + break + return acronym_matches + + +def _count_phrase_matches(heuristics_text): + """Count number of good wind energy phrases that appear in text.""" + return sum( + all(keyword in heuristics_text for keyword in phrase.split(" ")) + for phrase in GOOD_WIND_PHRASES + ) +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/ords/validation/location.html b/_modules/elm/ords/validation/location.html new file mode 100644 index 00000000..43e60b83 --- /dev/null +++ b/_modules/elm/ords/validation/location.html @@ -0,0 +1,613 @@ + + + + + + elm.ords.validation.location — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.ords.validation.location

+# -*- coding: utf-8 -*-
+"""ELM Ordinance Location Validation logic
+
+These are primarily used to validate that a legal document applies to a
+particular location.
+"""
+import asyncio
+import logging
+from abc import ABC, abstractmethod
+
+from elm.ords.extraction.ngrams import convert_text_to_sentence_ngrams
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class FixedMessageValidator(ABC): + """Validation base class using a static system prompt.""" + + SYSTEM_MESSAGE = None + """LLM system message describing validation task. """ + + def __init__(self, structured_llm_caller): + """ + + Parameters + ---------- + structured_llm_caller : :class:`elm.ords.llm.StructuredLLMCaller` + StructuredLLMCaller instance. Used for structured validation + queries. + """ + self.slc = structured_llm_caller + +
+[docs] + async def check(self, content, **fmt_kwargs): + """Check if the content passes the validation. + + The exact validation is outlined in the class `SYSTEM_MESSAGE`. + + Parameters + ---------- + content : str + Document content to validate. + **fmt_kwargs + Keyword arguments to be passed to `SYSTEM_MESSAGE.format()`. + + Returns + ------- + bool + ``True`` if the content passes the validation check, + ``False`` otherwise. + """ + if not content: + return False + sys_msg = self.SYSTEM_MESSAGE.format(**fmt_kwargs) + out = await self.slc.call( + sys_msg, content, usage_sub_label="document_location_validation" + ) + return self._parse_output(out)
+ + + @abstractmethod + def _parse_output(self, props): + """Parse LLM response and return `True` if the document passes.""" + raise NotImplementedError
+ + + +
+[docs] +class URLValidator(FixedMessageValidator): + """Validator that checks wether a URL matches a county.""" + + SYSTEM_MESSAGE = ( + "You extract structured data from a URL. Return your " + "answer in JSON format. Your JSON file must include exactly two keys. " + "The first key is 'correct_county', which is a boolean that is set to " + "`True` if the URL mentions {county} County in some way. DO NOT infer " + "based on information in the URL about any US state, city, township, " + "or otherwise. `False` if not sure. The second key is " + "'correct_state', which is a boolean that is set to `True` if the URL " + "mentions {state} State in some way. DO NOT infer based on " + "information in the URL about any US county, city, township, or " + "otherwise. `False` if not sure." + ) + + def _parse_output(self, props): + """Parse LLM response and return `True` if the document passes.""" + logger.debug("Parsing URL validation output:\n\t%s", props) + check_vars = ("correct_county", "correct_state") + return all(props.get(var) for var in check_vars)
+ + + +
+[docs] +class CountyJurisdictionValidator(FixedMessageValidator): + """Validator that checks wether text applies at the county level.""" + + SYSTEM_MESSAGE = ( + "You extract structured data from legal text. Return " + "your answer in JSON format. Your JSON file must include exactly " + "three keys. The first key is 'x', which is a boolean that is set to " + "`True` if the text excerpt explicitly mentions that the regulations " + "within apply to a jurisdiction scope other than {county} County " + "(i.e. they apply to a subdivision like a township or a city, or " + "they apply more broadly, like to a state or the full country). " + "`False` if the regulations in the text apply at the {county} County " + "level, if the regulations in the text apply to all unincorporated " + "areas of {county} County, or if there is not enough information to " + "determine the answer. The second key is 'y', which is a boolean " + "that is set to `True` if the text excerpt explicitly mentions that " + "the regulations within apply to more than one county. `False` if " + "the regulations in the text excerpt apply to a single county only " + "or if there is not enough information to determine the answer. The " + "third key is 'explanation', which is a string that contains a short " + "explanation if you chose `True` for any answers above." + ) + + def _parse_output(self, props): + """Parse LLM response and return `True` if the document passes.""" + logger.debug( + "Parsing county jurisdiction validation output:\n\t%s", props + ) + check_vars = ("x", "y") + return not any(props.get(var) for var in check_vars)
+ + + +
+[docs] +class CountyNameValidator(FixedMessageValidator): + """Validator that checks wether text applies to a particular county.""" + + SYSTEM_MESSAGE = ( + "You extract structured data from legal text. Return " + "your answer in JSON format. Your JSON file must include exactly " + "three keys. The first key is 'wrong_county', which is a boolean that " + "is set to `True` if the legal text is not for {county} County. Do " + "not infer based on any information about any US state, city, " + "township, or otherwise. `False` if the text applies to {county} " + "County or if there is not enough information to determine the " + "answer. The second key is 'wrong_state', which is a boolean that is " + "set to `True` if the legal text is not for a county in {state} " + "State. Do not infer based on any information about any US county, " + "city, township, or otherwise. `False` if the text applies to " + "a county in {state} State or if there is not enough information to " + "determine the answer. The third key is 'explanation', which is a " + "string that contains a short explanation if you chose `True` for " + "any answers above." + ) + + def _parse_output(self, props): + """Parse LLM response and return `True` if the document passes.""" + logger.debug("Parsing county validation output:\n\t%s", props) + check_vars = ("wrong_county", "wrong_state") + return not any(props.get(var) for var in check_vars)
+ + + +
+[docs] +class CountyValidator: + """ELM Ords County validator. + + Combines the logic of several validators into a single class. + """ + + def __init__(self, structured_llm_caller, score_thresh=0.8): + """ + + Parameters + ---------- + structured_llm_caller : :class:`elm.ords.llm.StructuredLLMCaller` + StructuredLLMCaller instance. Used for structured validation + queries. + score_thresh : float, optional + Score threshold to exceed when voting on content from raw + pages. By default, ``0.8``. + """ + self.score_thresh = score_thresh + self.cn_validator = CountyNameValidator(structured_llm_caller) + self.cj_validator = CountyJurisdictionValidator(structured_llm_caller) + self.url_validator = URLValidator(structured_llm_caller) + +
+[docs] + async def check(self, doc, county, state): + """Check if the document belongs to the county. + + Parameters + ---------- + doc : :class:`elm.web.document.BaseDocument` + Document instance. Should contain a "source" key in the + metadata that contains a URL (used for the URL validation + check). Raw content will be parsed for county name and + correct jurisdiction. + county : str + County that document should belong to. + state : str + State corresponding to `county` input. + + Returns + ------- + bool + `True` if the doc contents pertain to the input county. + `False` otherwise. + """ + source = doc.metadata.get("source") + logger.debug( + "Validating document from source: %s", source or "Unknown" + ) + logger.debug("Checking for correct for jurisdiction...") + jurisdiction_is_county = await _validator_check_for_doc( + validator=self.cj_validator, + doc=doc, + score_thresh=self.score_thresh, + county=county, + ) + if not jurisdiction_is_county: + return False + + logger.debug( + "Checking URL (%s) for county name...", source or "Unknown" + ) + url_is_county = await self.url_validator.check( + source, county=county, state=state + ) + if url_is_county: + return True + + logger.debug( + "Checking text for county name (heuristic; URL: %s)...", + source or "Unknown", + ) + correct_county_heuristic = _heuristic_check_for_county_and_state( + doc, county, state + ) + logger.debug( + "Found county name in text (heuristic): %s", + correct_county_heuristic, + ) + if correct_county_heuristic: + return True + + logger.debug( + "Checking text for county name (LLM; URL: %s)...", + source or "Unknown", + ) + return await _validator_check_for_doc( + validator=self.cn_validator, + doc=doc, + score_thresh=self.score_thresh, + county=county, + state=state, + )
+
+ + + +def _heuristic_check_for_county_and_state(doc, county, state): + """Check if county and state names are in doc""" + return any( + any( + (county.lower() in fg and state.lower() in fg) + for fg in convert_text_to_sentence_ngrams(t.lower(), 5) + ) + for t in doc.pages + ) + + +async def _validator_check_for_doc(validator, doc, score_thresh=0.8, **kwargs): + """Apply a validator check to a doc's raw pages.""" + outer_task_name = asyncio.current_task().get_name() + validation_checks = [ + asyncio.create_task( + validator.check(text, **kwargs), name=outer_task_name + ) + for text in doc.raw_pages + ] + out = await asyncio.gather(*validation_checks) + score = _weighted_vote(out, doc) + logger.debug( + "%s score is %.2f for doc from source %s (Pass: %s)", + validator.__class__.__name__, + score, + doc.metadata.get("source", "Unknown"), + str(score > score_thresh), + ) + return score > score_thresh + + +def _weighted_vote(out, doc): + """Compute weighted average of responses based on text length.""" + if not doc.raw_pages: + return 0 + weights = [len(text) for text in doc.raw_pages] + total = sum(verdict * weight for verdict, weight in zip(out, weights)) + return total / sum(weights) +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/osti.html b/_modules/elm/osti.html new file mode 100644 index 00000000..afea91d5 --- /dev/null +++ b/_modules/elm/osti.html @@ -0,0 +1,634 @@ + + + + + + elm.osti — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.osti

+# -*- coding: utf-8 -*-
+"""
+Utilities for retrieving data from OSTI.
+"""
+import copy
+import requests
+import json
+import os
+import pandas as pd
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class OstiRecord(dict): + """Class to handle a single OSTI record as dictionary data""" + + def __init__(self, record): + """ + Parameters + ---------- + record : dict + OSTI record in dict form, typically a response from OSTI API. + """ + assert isinstance(record, dict) + super().__init__(**record) + +
+[docs] + @staticmethod + def strip_nested_brackets(text): + """Remove text between brackets/parentheses for cleaning OSTI text""" + ret = '' + skip1c = 0 + skip2c = 0 + for i in text: + if i == '[': + skip1c += 1 + elif i == '(': + skip2c += 1 + elif i == ']' and skip1c > 0: + skip1c -= 1 + elif i == ')' and skip2c > 0: + skip2c -= 1 + elif skip1c == 0 and skip2c == 0: + ret += i + return ret
+ + + @property + def authors(self): + """Get the list of authors of this record. + + Returns + ------- + str + """ + au = copy.deepcopy(self.get('authors', None)) + if au is not None: + for i, name in enumerate(au): + name = self.strip_nested_brackets(name) + if name.count(',') == 1: + second, first = name.split(',') + name = f'{first.strip()} {second.strip()}' + au[i] = name + au = ', '.join(au) + return au + + @property + def title(self): + """Get the title of this record + + Returns + ------- + str | None + """ + return self.get('title', None) + + @property + def year(self): + """Get the year of publication of this record + + Returns + ------- + str | None + """ + year = self.get('publication_date', None) + if year is not None: + year = year.split('-')[0] + year = str(year) + return year + + @property + def date(self): + """Get the date of publication of this record + + Returns + ------- + str | None + """ + date = self.get('publication_date', None) + if date is not None: + date = date.split('T')[0] + date = str(date) + return date + + @property + def doi(self): + """Get the DOI of this record + + Returns + ------- + str | None + """ + return self.get('doi', None) + + @property + def osti_id(self): + """Get the OSTI ID of this record which is typically a 7 digit number + + Returns + ------- + str | None + """ + return self.get('osti_id', None) + + @property + def url(self): + """Get the download URL of this record + + Returns + ------- + str | None + """ + url = None + for link in self['links']: + if link.get('rel', None) == 'fulltext': + url = link.get('href', None) + break + return url + +
+[docs] + def download(self, fp): + """Download the PDF of this record + + Parameters + ---------- + fp : str + Filepath to download this record to, typically a .pdf + """ + # OSTI returns citation on first query and pdf on second (weird) + session = requests.Session() + response = session.get(self.url) + response = session.get(self.url) + with open(fp, 'wb') as f_pdf: + f_pdf.write(response.content)
+
+ + + +
+[docs] +class OstiList(list): + """Class to retrieve and handle multiple OSTI records from an API URL.""" + + BASE_URL = 'https://www.osti.gov/api/v1/records' + """Base OSTI API URL. This can be appended with search parameters""" + + def __init__(self, url, n_pages=1): + """ + Parameters + ---------- + url : str + OSTI API URL to request, see this for details: + https://www.osti.gov/api/v1/docs + n_pages : int + Number of pages to get from the API. Typical response has 20 + entries per page. Default of 1 ensures that this class doesnt hang + on a million responses. + """ + + self.url = url + self._session = requests.Session() + self._response = None + self._n_pages = 0 + self._iter = 0 + + records = self._get_first() + for page in self._get_pages(n_pages=n_pages): + records += page + records = [OstiRecord(single) for single in records] + super().__init__(records) + + def _get_first(self): + """Get the first page of OSTI records + + Returns + ------- + list + """ + self._response = self._session.get(self.url) + + if not self._response.ok: + msg = ('OSTI API Request got error {}: "{}"' + .format(self._response.status_code, + self._response.reason)) + raise RuntimeError(msg) + first_page = self._response.json() + + self._n_pages = 1 + if 'last' in self._response.links: + url = self._response.links['last']['url'] + self._n_pages = int(url.split('page=')[-1]) + + logger.debug('Found approximately {} records.' + .format(self._n_pages * len(first_page))) + + return first_page + + def _get_pages(self, n_pages): + """Get response pages up to n_pages from OSTI. + + Parameters + ---------- + n_pages : int + Number of pages to retrieve + + Returns + ------- + next_pages : list + This function will return a generator of next pages, each of which + is a list of OSTI records + """ + if n_pages > 1: + for page in range(2, self._n_pages + 1): + if page <= n_pages: + next_page = self._session.get(self.url, + params={'page': page}) + next_page = next_page.json() + yield next_page + else: + break + +
+[docs] + def download(self, out_dir): + """Download all PDFs from the records in this OSTI object into a + directory. PDFs will be given file names based on their OSTI record ID + + Parameters + ---------- + out_dir : str + Directory to download PDFs to. This directory will be created if it + does not already exist. + """ + logger.info('Downloading {} records to: {}'.format(len(self), out_dir)) + os.makedirs(out_dir, exist_ok=True) + for record in self: + fp_out = os.path.join(out_dir, record.osti_id + '.pdf') + if not os.path.exists(fp_out): + try: + record.download(fp_out) + except Exception as e: + logger.exception('Could not download OSTI ID {} "{}": {}' + .format(record.osti_id, record.title, e)) + logger.info('Finished download!')
+ + + @property + def meta(self): + """Get a meta dataframe with details on all of the OSTI records. + + Returns + ------- + pd.DataFrame + """ + i = 0 + attrs = ('authors', 'title', 'year', 'date', 'doi', 'osti_id', 'url') + df = pd.DataFrame(columns=attrs) + for record in self: + for attr in attrs: + out = getattr(record, attr) + if not isinstance(out, str): + out = json.dumps(out) + df.at[i, attr] = out + df.at[i, 'fn'] = f'{record.osti_id}.pdf' + i += 1 + return df + +
+[docs] + @classmethod + def from_osti_ids(cls, oids): + """Initialize OSTI records from one or more numerical IDS + + Parameters + ---------- + oids : list + List of string or integer OSTI IDs which are typically 7 digit + numbers + + Returns + ------- + out : OstiList + OstiList object with entries for each oid input. + """ + if not isinstance(oids, (list, tuple)): + oids = [oids] + oids = [str(oid) for oid in oids] + out = None + for oid in oids: + iout = cls(cls.BASE_URL + '/' + oid) + if out is None: + out = iout + else: + out += iout + return out
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/pdf.html b/_modules/elm/pdf.html new file mode 100644 index 00000000..f1812ee4 --- /dev/null +++ b/_modules/elm/pdf.html @@ -0,0 +1,690 @@ + + + + + + elm.pdf — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.pdf

+# -*- coding: utf-8 -*-
+# fmt: off
+"""
+ELM PDF to text parser
+"""
+import os
+import subprocess
+import requests
+import tempfile
+import copy
+from PyPDF2 import PdfReader
+import logging
+
+from elm.base import ApiBase
+from elm.utilities.parse import is_multi_col, combine_pages, clean_headers
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class PDFtoTXT(ApiBase): + """Class to parse text from a PDF document.""" + + MODEL_ROLE = ('You clean up poorly formatted text ' + 'extracted from PDF documents.') + """High level model role.""" + + MODEL_INSTRUCTION = ('Text extracted from a PDF: ' + '\n"""\n{}\n"""\n\n' + 'The text above was extracted from a PDF document. ' + 'Can you make it nicely formatted? ' + 'Please only return the formatted text ' + 'without comments or added information.') + """Instructions to the model with python format braces for pdf text""" + + def __init__(self, fp, page_range=None, model=None): + """ + Parameters + ---------- + fp : str + Filepath to .pdf file to extract. + page_range : None | list + Optional 2-entry list/tuple to set starting and ending pages + (python indexing) + model : None | str + Optional specification of OpenAI model to use. Default is + cls.DEFAULT_MODEL + """ + super().__init__(model) + self.fp = fp + self.raw_pages = self.load_pdf(page_range) + self.pages = self.raw_pages + self.full = combine_pages(self.raw_pages) + +
+[docs] + def load_pdf(self, page_range): + """Basic load of pdf to text strings + + Parameters + ---------- + page_range : None | list + Optional 2-entry list/tuple to set starting and ending pages + (python indexing) + + Returns + ------- + out : list + List of strings where each entry is a page. This is the raw PDF + text before GPT cleaning + """ + + logger.info('Loading PDF: {}'.format(self.fp)) + out = [] + pdf = PdfReader(self.fp) + + if page_range is not None: + assert len(page_range) == 2 + page_range = slice(*page_range) + else: + page_range = slice(0, None) + + for i, page in enumerate(pdf.pages[page_range]): + page_text = page.extract_text() + if len(page_text.strip()) == 0: + logger.debug('Skipping empty page {} out of {}' + .format(i + 1 + page_range.start, len(pdf.pages))) + else: + out.append(page_text) + + logger.info('Finished loading PDF.') + return out
+ + +
+[docs] + def make_gpt_messages(self, pdf_raw_text): + """Make the chat completion messages list for input to GPT + + Parameters + ---------- + pdf_raw_text : str + Raw PDF text to be cleaned + + Returns + ------- + messages : list + Messages for OpenAI chat completion model. Typically this looks + like this: + [{"role": "system", "content": "You do this..."}, + {"role": "user", "content": "Please do this: {}"}] + """ + + query = self.MODEL_INSTRUCTION.format(pdf_raw_text) + messages = [{"role": "system", "content": self.MODEL_ROLE}, + {"role": "user", "content": query}] + + return messages
+ + +
+[docs] + def clean_txt(self): + """Use GPT to clean raw pdf text in serial calls to the OpenAI API. + + Returns + ------- + clean_pages : list + List of clean text strings where each list entry is a page from the + PDF + """ + + logger.info('Cleaning PDF text...') + clean_pages = [] + + for i, raw_page in enumerate(self.raw_pages): + msg = self.make_gpt_messages(copy.deepcopy(raw_page)) + req = {"model": self.model, "messages": msg, "temperature": 0.0} + + kwargs = dict(url=self.URL, headers=self.HEADERS, json=req) + + try: + response = requests.post(**kwargs) + response = response.json() + except Exception as e: + msg = 'Error in OpenAI API call!' + logger.exception(msg) + response = {'error': str(e)} + + choice = response.get('choices', [{'message': {'content': ''}}])[0] + message = choice.get('message', {'content': ''}) + content = message.get('content', '') + clean_pages.append(content) + logger.debug('Cleaned page {} out of {}' + .format(i + 1, len(self.raw_pages))) + + logger.info('Finished cleaning PDF.') + + self.pages = clean_pages + self.full = combine_pages(self.pages) + self.validate_clean() + + return clean_pages
+ + +
+[docs] + async def clean_txt_async(self, ignore_error=None, rate_limit=40e3): + """Use GPT to clean raw pdf text in parallel calls to the OpenAI API. + + NOTE: you need to call this using the await command in ipython or + jupyter, e.g.: `out = await PDFtoTXT.clean_txt_async()` + + Parameters + ---------- + ignore_error : None | callable + Optional callable to parse API error string. If the callable + returns True, the error will be ignored, the API call will not be + tried again, and the output will be an empty string. + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + gpt-3.5-turbo limit is 90k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + + Returns + ------- + clean_pages : list + List of clean text strings where each list entry is a page from the + PDF + """ + + logger.info('Cleaning PDF text asyncronously...') + + all_request_jsons = [] + for page in self.raw_pages: + msg = self.make_gpt_messages(page) + req = {"model": self.model, "messages": msg, "temperature": 0.0} + all_request_jsons.append(req) + + clean_pages = await self.call_api_async(self.URL, self.HEADERS, + all_request_jsons, + ignore_error=ignore_error, + rate_limit=rate_limit) + + for i, page in enumerate(clean_pages): + choice = page.get('choices', [{'message': {'content': ''}}])[0] + message = choice.get('message', {'content': ''}) + content = message.get('content', '') + clean_pages[i] = content + + logger.info('Finished cleaning PDF.') + + self.pages = clean_pages + self.full = combine_pages(self.pages) + self.validate_clean() + + return clean_pages
+ + +
+[docs] + def is_double_col(self, separator=' '): + """Does the text look like it has multiple vertical text columns? + + Parameters + ---------- + separator : str + Heuristic split string to look for spaces between columns + + Returns + ------- + out : bool + True if more than one vertical text column + """ + return is_multi_col(self.full, separator=separator)
+ + +
+[docs] + def clean_poppler(self, layout=True): + """Clean the pdf using the poppler pdftotxt utility + + Requires the `pdftotext` command line utility from this software: + https://poppler.freedesktop.org/ + + Parameters + ---------- + layout : bool + Layout flag for poppler pdftotxt utility: "maintain original + physical layout". Layout=True works well for single column text, + layout=False collapses the double columns into single columns which + works better for downstream chunking and LLM work. + + Returns + ------- + out : str + Joined cleaned pages + """ + + with tempfile.TemporaryDirectory() as td: + fp_out = os.path.join(td, 'poppler_out.txt') + args = ['pdftotext', f"{self.fp}", f"{fp_out}"] + if layout: + args.insert(1, '-layout') + + if not os.path.exists(os.path.dirname(fp_out)): + os.makedirs(os.path.dirname(fp_out), exist_ok=True) + + try: + stdout = subprocess.run(args, check=True, + stdout=subprocess.PIPE) + except Exception as e: + msg = ('PDF cleaning with poppler failed! This usually ' + 'because you have not installed the poppler utility ' + '(see https://poppler.freedesktop.org/). ' + f'Full error: {e}') + logger.exception(msg) + raise RuntimeError(msg) from e + else: + if stdout.returncode != 0: + msg = ('Poppler raised return code {}: {}' + .format(stdout.returncode, stdout)) + logger.exception(msg) + raise RuntimeError(msg) + + with open(fp_out, 'r') as f: + clean_txt = f.read() + + # break on poppler page break + self.pages = clean_txt.split('\x0c') + remove = [] + for i, page in enumerate(self.pages): + if not any(page.strip()): + remove.append(i) + for i in remove[::-1]: + _ = self.pages.pop(i) + + self.full = combine_pages(self.pages) + + return self.full
+ + +
+[docs] + def validate_clean(self): + """Run some basic checks on the GPT cleaned text vs. the raw text""" + repl = ('\n', '.', ',', '-', '/', ':') + + if not any(self.full.replace('\n', '').strip()): + msg = 'Didnt get ANY clean output text!' + logger.error(msg) + raise RuntimeError(msg) + + def replace_chars_for_clean(text): + for char in repl: + text = text.replace(char, ' ') + return text + + for i, (raw, clean) in enumerate(zip(self.raw_pages, self.pages)): + raw_words = replace_chars_for_clean(raw).split(' ') + clean_words = replace_chars_for_clean(clean).split(' ') + + raw_words = {x for x in raw_words if len(x) > 2} + clean_words = {x for x in clean_words if len(x) > 2} + + isin = sum(x in clean_words for x in raw_words) + + perc = 100 + if isin > 0 and len(raw_words) > 0: + perc = 100 * isin / len(raw_words) + + if perc < 70: + logger.warning('Page {} of {} has a {:.2f}% match with {} ' + 'unique words in the raw text.' + .format(i + 1, len(self.raw_pages), perc, + len(raw_words))) + else: + logger.info('Page {} of {} has a {:.2f}% match with {} ' + 'unique words in the raw text.' + .format(i + 1, len(self.raw_pages), perc, + len(raw_words)))
+ + +
+[docs] + def clean_headers(self, char_thresh=0.6, page_thresh=0.8, split_on='\n', + iheaders=(0, 1, -2, -1)): + """Clean headers/footers that are duplicated across pages + + Parameters + ---------- + char_thresh : float + Fraction of characters in a given header that are similar between + pages to be considered for removal + page_thresh : float + Fraction of pages that share the header to be considered for + removal + split_on : str + Chars to split lines of a page on + iheaders : list | tuple + Integer indices to look for headers after splitting a page into + lines based on split_on. This needs to go from the start of the + page to the end. + + Returns + ------- + out : str + Clean text with all pages joined + """ + self.pages = clean_headers(self.pages, char_thresh=char_thresh, + page_thresh=page_thresh, split_on=split_on, + iheaders=iheaders) + self.full = combine_pages(self.pages) + return self.full
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/summary.html b/_modules/elm/summary.html new file mode 100644 index 00000000..d45130c1 --- /dev/null +++ b/_modules/elm/summary.html @@ -0,0 +1,506 @@ + + + + + + elm.summary — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.summary

+# -*- coding: utf-8 -*-
+"""
+Research Summarization and Distillation with LLMs
+"""
+import logging
+import os
+
+from elm.base import ApiBase
+from elm.chunk import Chunker
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class Summary(ApiBase): + """Interface to perform Recursive Summarization and Distillation of + research text""" + + MODEL_ROLE = "You are an energy scientist summarizing prior research" + """High level model role, somewhat redundant to MODEL_INSTRUCTION""" + + MODEL_INSTRUCTION = ('Can you please summarize the text quoted above ' + 'in {n_words} words?\n\n"""\n{text_chunk}\n"""') + """Prefix to the engineered prompt. The format args `text_chunk` and + `n_words` will be formatted by the Summary class at runtime. `text_chunk` + will be provided by the Summary text chunks, `n_words` is an initialization + argument for the Summary class.""" + + def __init__(self, text, model=None, n_words=500, **chunk_kwargs): + """ + Parameters + ---------- + text : str | list + Single body of text to chunk up using elm.Chunker or a pre-chunked + list of strings. Works well if this is a single document with empty + lines between paragraphs. + model : str + GPT model name, default is the DEFAULT_MODEL global var + n_words : int + Desired length of the output text. Note that this is never perfect + but helps guide the LLM to an approximate desired output length. + 400-600 words seems to work quite well with GPT-4. This gets + formatted into the MODEL_INSTRUCTION attribute. + chunk_kwargs : dict | None + kwargs for initialization of :class:`elm.chunk.Chunker` + """ + + super().__init__(model) + + self.text = text + self.n_words = n_words + + assert isinstance(self.text, (str, list, tuple)) + + if isinstance(self.text, str): + if os.path.isfile(text): + logger.info('Loading text file: {}'.format(text)) + with open(text, 'r') as f: + self.text = f.read() + self.text_chunks = Chunker(self.text, **chunk_kwargs) + else: + self.text_chunks = self.text + + self.summary_chunks = [] + +
+[docs] + def combine(self, text_summary): + """Combine separate chunk summaries into one more comprehensive + narrative + + Parameters + ---------- + summary : str + Summary of text. May be several disjointed paragraphs + + Returns + ------- + summary : str + Summary of text. Paragraphs will be more cohesive. + """ + role = 'You provide editorial services for technical writing.' + query = ('Can you combine the following paragraphs and ' + 'ease the transitions between them? ' + f'\n\n"""{text_summary}"""') + text_summary = self.generic_query(query, model_role=role) + return text_summary
+ + +
+[docs] + def run(self, temperature=0, fancy_combine=True): + """Use GPT to do a summary of input text. + + Parameters + ---------- + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + fancy_combine : bool + Flag to use the GPT model to combine the separate outputs into a + cohesive summary. + + Returns + ------- + summary : str + Summary of text. + """ + + logger.info('Summarizing {} text chunks in serial...' + .format(len(self.text_chunks))) + summary = '' + + for i, chunk in enumerate(self.text_chunks): + logger.debug('Summarizing text chunk {} out of {}' + .format(i + 1, len(self.text_chunks))) + + msg = self.MODEL_INSTRUCTION.format(text_chunk=chunk, + n_words=self.n_words) + response = self.generic_query(msg, model_role=self.MODEL_ROLE, + temperature=temperature) + self.summary_chunks.append(response) + summary += f'\n\n{response}' + + if fancy_combine: + summary = self.combine(summary) + + logger.info('Finished all summaries.') + + return summary
+ + +
+[docs] + async def run_async(self, temperature=0, ignore_error=None, + rate_limit=40e3, fancy_combine=True): + """Run text summary asynchronously for all text chunks + + NOTE: you need to call this using the await command in ipython or + jupyter, e.g.: `out = await Summary.run_async()` + + Parameters + ---------- + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + ignore_error : None | callable + Optional callable to parse API error string. If the callable + returns True, the error will be ignored, the API call will not be + tried again, and the output will be an empty string. + rate_limit : float + OpenAI API rate limit (tokens / minute). Note that the + gpt-3.5-turbo limit is 90k as of 4/2023, but we're using a large + factor of safety (~1/2) because we can only count the tokens on the + input side and assume the output is about the same count. + fancy_combine : bool + Flag to use the GPT model to combine the separate outputs into a + cohesive summary. + + Returns + ------- + summary : str + Summary of text. + """ + + logger.info('Summarizing {} text chunks asynchronously...' + .format(len(self.text_chunks))) + + queries = [] + for chunk in self.text_chunks: + msg = self.MODEL_INSTRUCTION.format(text_chunk=chunk, + n_words=self.n_words) + queries.append(msg) + + summaries = await self.generic_async_query(queries, + model_role=self.MODEL_ROLE, + temperature=temperature, + ignore_error=ignore_error, + rate_limit=rate_limit) + + self.summary_chunks = summaries + summary = '\n\n'.join(summaries) + + if fancy_combine: + summary = self.combine(summary) + + logger.info('Finished all summaries.') + + return summary
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/tree.html b/_modules/elm/tree.html new file mode 100644 index 00000000..39e82058 --- /dev/null +++ b/_modules/elm/tree.html @@ -0,0 +1,534 @@ + + + + + + elm.tree — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.tree

+# -*- coding: utf-8 -*-
+"""
+ELM decision trees.
+"""
+import networkx as nx
+import logging
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +class DecisionTree: + """Class to traverse a directed graph of LLM prompts. Nodes are + prompts and edges are transitions between prompts based on conditions + being met in the LLM response.""" + + def __init__(self, graph): + """Class to traverse a directed graph of LLM prompts. Nodes are + prompts and edges are transitions between prompts based on conditions + being met in the LLM response. + + Examples + -------- + Here's a simple example to setup a decision tree graph and run with the + DecisionTree class: + + >>> import logging + >>> import networkx as nx + >>> from rex import init_logger + >>> from elm.base import ApiBase + >>> from elm.tree import DecisionTree + >>> + >>> init_logger('elm.tree') + >>> + >>> G = nx.DiGraph(text='hello', name='Grant', + api=ApiBase(model='gpt-35-turbo')) + >>> + >>> G.add_node('init', prompt='Say {text} to {name}') + >>> G.add_edge('init', 'next', condition=lambda x: 'Grant' in x) + >>> G.add_node('next', prompt='How are you?') + >>> + >>> tree = DecisionTree(G) + >>> out = tree.run() + >>> + >>> print(tree.all_messages_txt) + + Parameters + ---------- + graph : nx.DiGraph + Directed acyclic graph where nodes are LLM prompts and edges are + logical transitions based on the response. Must have high-level + graph attribute "api" which is an ApiBase instance. Nodes should + have attribute "prompt" which can have {format} named arguments + that will be filled from the high-level graph attributes. Edges can + have attribute "condition" that is a callable to be executed on the + LLM response text. An edge from a node without a condition acts as + an "else" statement if no other edge conditions are satisfied. A + single edge from node to node does not need a condition. + """ + self._g = graph + self._history = [] + assert isinstance(self.graph, nx.DiGraph) + assert 'api' in self.graph.graph + + @property + def api(self): + """Get the ApiBase object. + + Returns + ------- + ApiBase + """ + return self.graph.graph['api'] + + @property + def messages(self): + """Get a list of the conversation messages with the LLM. + + Returns + ------- + list + """ + return self.api.messages + + @property + def all_messages_txt(self): + """Get a printout of the full conversation with the LLM + + Returns + ------- + str + """ + return self.api.all_messages_txt + + @property + def history(self): + """Get a record of the nodes traversed in the tree + + Returns + ------- + list + """ + return self._history + + @property + def graph(self): + """Get the networkx graph object + + Returns + ------- + nx.DiGraph + """ + return self._g + +
+[docs] + def call_node(self, node0): + """Call the LLM with the prompt from the input node and search the + successor edges for a valid transition condition + + Parameters + ---------- + node0 : str + Name of node being executed. + + Returns + ------- + out : str + Next node or LLM response if at a leaf node. + """ + prompt = self._prepare_graph_call(node0) + out = self.api.chat(prompt) + return self._parse_graph_output(node0, out)
+ + + def _prepare_graph_call(self, node0): + """Prepare a graph call for given node.""" + prompt = self.graph.nodes[node0]['prompt'] + txt_fmt = {k: v for k, v in self.graph.graph.items() if k != 'api'} + prompt = prompt.format(**txt_fmt) + self._history.append(node0) + return prompt + + def _parse_graph_output(self, node0, out): + """Parse graph output for given node and LLM call output. """ + successors = list(self.graph.successors(node0)) + edges = [self.graph.edges[(node0, node1)] for node1 in successors] + conditions = [edge.get('condition', None) for edge in edges] + + if len(successors) == 0: + logger.info(f'Reached leaf node "{node0}".') + return out + + if len(successors) > 1 and all(c is None for c in conditions): + msg = (f'At least one of the edges from "{node0}" should have ' + f'a "condition": {edges}') + logger.error(msg) + raise AttributeError(msg) + + # prioritize callable conditions + for i, condition in enumerate(conditions): + if callable(condition) and condition(out): + logger.info(f'Node transition: "{node0}" -> "{successors[i]}" ' + '(satisfied by callable condition)') + return successors[i] + + # None condition is basically "else" statement + for i, condition in enumerate(conditions): + if condition is None: + logger.info(f'Node transition: "{node0}" -> "{successors[i]}" ' + '(satisfied by None condition)') + return successors[i] + + msg = (f'None of the edge conditions from "{node0}" ' + f'were satisfied: {edges}') + logger.error(msg) + raise AttributeError(msg) + +
+[docs] + def run(self, node0='init'): + """Traverse the decision tree starting at the input node. + + Parameters + ---------- + node0 : str + Name of starting node in the graph. This is typically called "init" + + Returns + ------- + out : str + Final response from LLM at the leaf node. + """ + + self._history = [] + + while True: + try: + out = self.call_node(node0) + except Exception as e: + last_message = self.messages[-1]['content'] + msg = ('Ran into an exception when traversing tree. ' + 'Last message from LLM is printed below. ' + 'See debug logs for more detail. ' + '\nLast message: \n' + f'"""\n{last_message}\n"""') + logger.debug('Error traversing trees, heres the full ' + 'conversation printout:' + f'\n{self.all_messages_txt}') + logger.error(msg) + raise RuntimeError(msg) from e + if out in self.graph: + node0 = out + else: + break + + logger.info(f'Output: {out}') + + return out
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/utilities/parse.html b/_modules/elm/utilities/parse.html new file mode 100644 index 00000000..e436ed90 --- /dev/null +++ b/_modules/elm/utilities/parse.html @@ -0,0 +1,809 @@ + + + + + + elm.utilities.parse — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.utilities.parse

+# -*- coding: utf-8 -*-
+"""ELM parsing utilities."""
+import io
+import re
+import logging
+from warnings import warn
+
+import html2text
+import numpy as np
+import pandas as pd
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +def is_multi_col(text, separator=" "): + """Does the text look like it has multiple vertical text columns? + + Parameters + ---------- + text : str + Input text, which may or may not contain multiple vertical + columns. + separator : str + Heuristic split string to look for spaces between columns + + Returns + ------- + out : bool + True if more than one vertical text column + """ + n_cols = [len(line.strip().split(separator)) for line in text.split("\n")] + return np.median(n_cols) >= 2
+ + + +
+[docs] +def remove_blank_pages(pages): + """Remove any blank pages from the iterable. + + Parameters + ---------- + pages : iterable + Iterable of string objects. Objects in this iterable that do not + contain any text will be removed. + + Returns + ------- + list + List of strings with content, or empty list. + """ + return [page for page in pages if any(page.strip())]
+ + + +
+[docs] +def html_to_text(html, ignore_links=True): + """Call to `HTML2Text` class with basic args. + + Parameters + ---------- + html : str + HTML text extracted from the web. + ignore_links : bool, optional + Option to ignore links in HTML when parsing. + By default, ``True``. + + Returns + ------- + str + Text extracted from the input HTML. + """ + h = html2text.HTML2Text() + h.ignore_links = ignore_links + h.ignore_images = True + h.bypass_tables = True + return h.handle(html)
+ + + +
+[docs] +def format_html_tables(text, **kwargs): + """Format tables within HTML text into pretty markdown. + + Note that if pandas does not detect enough tables in the text to + match the "<table>" tags, no replacement is performed at all. + + Parameters + ---------- + text : str + HTML text, possible containing tables enclosed by the + "<table>" tag. + **kwargs + Keyword-arguments to pass to ``pandas.DataFrame.to_markdown`` + function. Must not contain the `"headers"` keyword (this is + supplied internally). + + Returns + ------- + str + Text with HTML tables (if any) converted to markdown. + """ + matches = _find_html_table_matches(text) + if not matches: + return text + + dfs = _find_dfs(text) + if len(matches) != len(dfs): + logger.error( + "Found incompatible number of HTML (%d) and parsed (%d) tables! " + "No replacement performed.", + len(matches), + len(dfs), + ) + return text + + return _replace_tables_in_text(text, matches, dfs, **kwargs)
+ + + +def _find_html_table_matches(text): + """Find HTML table matches in the text""" + return re.findall(r"<table>[\s\S]*?</table>", text) + + +def _find_dfs(text): + """Load HTML tables from text into DataFrames""" + return pd.read_html(io.StringIO(text)) + + +def _replace_tables_in_text(text, matches, dfs, **kwargs): + """Replace all items in the 'matches' input with MD tables""" + for table_str, df in zip(matches, dfs): + new_table_str = df.to_markdown(headers=df.columns, **kwargs) + text = text.replace(table_str, new_table_str) + return text + + +
+[docs] +def clean_headers( + pages, + char_thresh=0.6, + page_thresh=0.8, + split_on="\n", + iheaders=(0, 1, -2, -1), +): + """Clean headers/footers that are duplicated across pages of a document. + + Note that this function will update the items within the `pages` + input. + + Parameters + ---------- + pages : list + List of pages (as str) from document. + char_thresh : float + Fraction of characters in a given header that are similar + between pages to be considered for removal + page_thresh : float + Fraction of pages that share the header to be considered for + removal + split_on : str + Chars to split lines of a page on + iheaders : list | tuple + Integer indices to look for headers after splitting a page into + lines based on split_on. This needs to go from the start of the + page to the end. + + Returns + ------- + out : str + Clean text with all pages joined + """ + logger.info("Cleaning headers") + headers = _get_nominal_headers(pages, split_on, iheaders) + tests = np.zeros((len(pages), len(headers))) + + for ip, page in enumerate(pages): + for ih, header in zip(iheaders, headers): + pheader = "" + try: + pheader = page.split(split_on)[ih] + except IndexError: + pass + + harr = header.replace(" ", "") + parr = pheader.replace(" ", "") + + harr = harr.ljust(len(parr)) + parr = parr.ljust(len(harr)) + + harr = np.array([*harr]) + parr = np.array([*parr]) + assert len(harr) == len(parr) + + test = harr == parr + if len(test) == 0: + test = 1.0 + else: + test = test.sum() / len(test) + + tests[ip, ih] = test + + logger.debug("Header tests (page, iheader): \n{}".format(tests)) + tests = (tests > char_thresh).sum(axis=0) / len(pages) + tests = tests > page_thresh + logger.debug("Header tests (iheader,): \n{}".format(tests)) + + header_inds_to_remove = { + ind for is_header, ind in zip(tests, iheaders) if is_header + } + if not header_inds_to_remove: + return pages + + for ip, page in enumerate(pages): + page = page.split(split_on) + if len(iheaders) >= len(page): + continue + pages[ip] = split_on.join( + [ + line + for line_ind, line in enumerate(page) + if line_ind not in header_inds_to_remove + and line_ind - len(page) not in header_inds_to_remove + ] + ) + + return pages
+ + + +def _get_nominal_headers(pages, split_on, iheaders): + """Get nominal headers from a standard page. + + This function aims for a "typical" page that is likely to have a + normal header, not the first or last. + + Parameters + ---------- + pages : list + List of pages (as str) from document. + split_on : str + Chars to split lines of a page on + iheaders : list | tuple + Integer indices to look for headers after splitting a page into + lines based on split_on. This needs to go from the start of the + page to the end. + + Returns + ------- + headers : list + List of headers where each entry is a string header + """ + + headers = [None] * len(iheaders) + page_lens = np.array([len(p) for p in pages]) + median_len = np.median(page_lens) + ipage = np.argmin(np.abs(page_lens - median_len)) + page = pages[ipage] + for i, ih in enumerate(iheaders): + try: + header = page.split(split_on)[ih] + except IndexError: + header = "" + headers[i] = header + + return headers + + +
+[docs] +def combine_pages(pages): + """Combine pages of GPT cleaned text into a single string. + + Parameters + ---------- + pages : list + List of pages (as str) from document. + + Returns + ------- + full : str + Single multi-page string + """ + return "\n".join(pages).replace("\n•", "-").replace("•", "-")
+ + + +
+[docs] +def replace_common_pdf_conversion_chars(text): + """Re-format text to remove common pdf-converter chars. + + Chars affected include ``\\r\\n``, ``\\r`` and ``\\x0c``. + + Parameters + ---------- + text : str + Input text (presumably from pdf parser). + + Returns + ------- + str + Cleaned text. + """ + return text.replace("\r\n", "\n").replace("\x0c", "").replace("\r", "\n")
+ + + +
+[docs] +def replace_multi_dot_lines(text): + """Replace instances of three or more dots (.....) with just "..." + + Parameters + ---------- + text : str + Text possibly containing many repeated dots. + + Returns + ------- + str + Cleaned text with only three dots max in a row. + """ + return re.sub(r"[.]{3,}", "...", text)
+ + + +
+[docs] +def replace_excessive_newlines(text): + """Replace instances of three or more newlines with ``\\n\\n`` + + Parameters + ---------- + text : str + Text possibly containing many repeated newline characters. + + Returns + ------- + str + Cleaned text with only a maximum of two newlines in a row. + """ + return re.sub(r"[\n]{3,}", "\n\n", text)
+ + + +
+[docs] +def remove_empty_lines_or_page_footers(text): + """Replace empty lines (potentially with page numbers only) as newlines + + Parameters + ---------- + text : str + Text possibly containing empty lines and/or lines with only page + numbers. + + Returns + ------- + str + Cleaned text with no empty lines. + """ + return re.sub(r"[\n\r]+(?:\s*?\d*?\s*)[\n\r]+", "\n", text)
+ + + +
+[docs] +def read_pdf(pdf_bytes, verbose=True): + """Read PDF contents from bytes. + + This method will automatically try to detect multi-column format + and load the text without a physical layout in that case. + + Parameters + ---------- + pdf_bytes : bytes + Bytes corresponding to a PDF file. + verbose : bool, optional + Option to log errors during parsing. By default, ``True``. + + Returns + ------- + iterable + Iterable containing pages of the PDF document. This iterable + may be empty if there was an error reading the PDF file. + """ + import pdftotext + + try: + pages = _load_pdf_possibly_multi_col(pdf_bytes) + except pdftotext.Error as e: + if verbose: + logger.error("Failed to decode PDF content!") + logger.exception(e) + pages = [] + + return pages
+ + + +def _load_pdf_possibly_multi_col(pdf_bytes): + """Load PDF, which may be multi-column""" + import pdftotext + + pdf_bytes = io.BytesIO(pdf_bytes) + pages = pdftotext.PDF(pdf_bytes, physical=True) + if is_multi_col(combine_pages(pages)): + pages = pdftotext.PDF(pdf_bytes, physical=False) + return pages + + +
+[docs] +def read_pdf_ocr(pdf_bytes, verbose=True): # pragma: no cover + """Read PDF contents from bytes using Optical Character recognition (OCR). + + This method attempt to read the PDF document using OCR. This is one + of the only ways to parse a scanned PDF document. To use this + function, you will need to install the `pytesseract` and `pdf2image` + Modules. Installation guides here: + + - `pytesseract`: + https://github.com/madmaze/pytesseract?tab=readme-ov-file#installation + - `pdf2image`: + https://github.com/Belval/pdf2image?tab=readme-ov-file#how-to-install + + Windows users may also need to apply the fix described in this + answer before they can use pytesseract: http://tinyurl.com/v9xr4vrj + + Parameters + ---------- + pdf_bytes : bytes + Bytes corresponding to a PDF file. + verbose : bool, optional + Option to log errors during parsing. By default, ``True``. + + Returns + ------- + iterable + Iterable containing pages of the PDF document. This iterable + may be empty if there was an error reading the PDF file. + """ + try: + pages = _load_pdf_with_pytesseract(pdf_bytes) + except Exception as e: + if verbose: + logger.error("Failed to decode PDF content!") + logger.exception(e) + pages = [] + + return pages
+ + + +def _load_pdf_with_pytesseract(pdf_bytes): # pragma: no cover + """Load PDF bytes using Optical Character recognition (OCR)""" + + try: + import pytesseract + except ImportError: + msg = ( + "Module `pytesseract` not found. Please follow these instructions " + "to install: https://github.com/madmaze/pytesseract?" + "tab=readme-ov-file#installation" + ) + logger.warning(msg) + warn(msg) + return [] + + try: + from pdf2image import convert_from_bytes + except ImportError: + msg = ( + "Module `pdf2image` not found. Please follow these instructions " + "to install: https://github.com/Belval/pdf2image?" + "tab=readme-ov-file#how-to-install" + ) + logger.warning(msg) + warn(msg) + return [] + + logger.debug( + "Loading PDF with `tesseract_cmd` as %s", + pytesseract.pytesseract.tesseract_cmd, + ) + + return [ + str(pytesseract.image_to_string(page_data).encode("utf-8")) + for page_data in convert_from_bytes(bytes(pdf_bytes)) + ] +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/utilities/retry.html b/_modules/elm/utilities/retry.html new file mode 100644 index 00000000..b01541b9 --- /dev/null +++ b/_modules/elm/utilities/retry.html @@ -0,0 +1,503 @@ + + + + + + elm.utilities.retry — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.utilities.retry

+# -*- coding: utf-8 -*-
+"""ELM retry utilities."""
+import time
+import random
+import asyncio
+import logging
+from functools import wraps
+
+import openai
+
+from elm.exceptions import ELMRuntimeError
+
+
+logger = logging.getLogger(__name__)
+
+
+
+[docs] +def retry_with_exponential_backoff( + base_delay=1, + exponential_base=4, + jitter=True, + max_retries=3, + errors=(openai.RateLimitError, openai.APITimeoutError), +): + """Retry a synchronous function with exponential backoff. + + This decorator works out-of-the-box for OpenAI chat completions + calls. To configure it for other functions, set the `errors` input + accordingly. + + Parameters + ---------- + base_delay : int, optional + The base delay time, in seconds. This time will be multiplied by + the exponential_base (plus any jitter) during each retry + iteration. The multiplication applies *at the first retry*. + Therefore, if your base delay is ``1`` and your + `exponential_base` is ``4`` (with no jitter), the delay before + the first retry will be ``1 * 4 = 4`` seconds. The subsequent + delay will be ``4 * 4 = 16`` seconds, and so on. + By default, ``1``. + exponential_base : int, optional + The multiplication factor applied to the base `delay` input. + See description of `delay` for an example. By default, ``4``. + jitter : bool, optional + Option to include a random fractional adder (0 - 1) to the + `exponential_base` before multiplying by the `delay`. This can + help ensure each function call is submitted slightly offset from + other calls in a batch and therefore help avoid repeated rate + limit failures by a batch of submissions arriving simultaneously + to a service. By default, ``True``. + max_retries : int, optional + Max number of retries before raising an `ELMRuntimeError`. + By default, ``3``. + errors : tuple, optional + The error class(es) to signal a retry. Other errors will be + propagated without retrying. + By default, ``(openai.RateLimitError, openai.APITimeoutError)``. + + References + ---------- + https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb + https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + """ + + def decorator(func): + @wraps(func) + def wrapper(*args, **kwargs): + num_retries = 0 + delay = base_delay + + while True: + try: + return func(*args, **kwargs) + except errors as e: + num_retries = _handle_retries(num_retries, max_retries, e) + delay = _compute_delay(delay, exponential_base, jitter) + logger.info( + "Error: %s. Retrying in %.2f seconds.", str(e), delay + ) + kwargs = _double_timeout(**kwargs) + time.sleep(delay) + + return wrapper + + return decorator
+ + + +
+[docs] +def async_retry_with_exponential_backoff( + base_delay=1, + exponential_base=4, + jitter=True, + max_retries=3, + errors=(openai.RateLimitError, openai.APITimeoutError), +): + """Retry an asynchronous function with exponential backoff. + + This decorator works out-of-the-box for OpenAI chat completions + calls. To configure it for other functions, set the `errors` input + accordingly. + + Parameters + ---------- + base_delay : int, optional + The base delay time, in seconds. This time will be multiplied by + the exponential_base (plus any jitter) during each retry + iteration. The multiplication applies *at the first retry*. + Therefore, if your base delay is ``1`` and your + `exponential_base` is ``4`` (with no jitter), the delay before + the first retry will be ``1 * 4 = 4`` seconds. The subsequent + delay will be ``4 * 4 = 16`` seconds, and so on. + By default, ``1``. + exponential_base : int, optional + The multiplication factor applied to the base `delay` input. + See description of `delay` for an example. By default, ``4``. + jitter : bool, optional + Option to include a random fractional adder (0 - 1) to the + `exponential_base` before multiplying by the `delay`. This can + help ensure each function call is submitted slightly offset from + other calls in a batch and therefore help avoid repeated rate + limit failures by a batch of submissions arriving simultaneously + to a service. By default, ``True``. + max_retries : int, optional + Max number of retries before raising an `ELMRuntimeError`. + By default, ``3``. + errors : tuple, optional + The error class(es) to signal a retry. Other errors will be + propagated without retrying. + By default, ``(openai.RateLimitError, openai.APITimeoutError)``. + + References + ---------- + https://github.com/openai/openai-cookbook/blob/main/examples/How_to_handle_rate_limits.ipynb + https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/ + """ + + def decorator(func): + @wraps(func) + async def wrapper(*args, **kwargs): + num_retries = 0 + delay = base_delay + + while True: + try: + return await func(*args, **kwargs) + except errors as e: + num_retries = _handle_retries(num_retries, max_retries, e) + delay = _compute_delay(delay, exponential_base, jitter) + logger.info( + "Error: %s. Retrying in %.2f seconds.", str(e), delay + ) + kwargs = _double_timeout(**kwargs) + await asyncio.sleep(delay) + + return wrapper + + return decorator
+ + + +def _handle_retries(num_retries, max_retries, error): + """Raise error if retry attempts exceed max limit""" + num_retries += 1 + if num_retries > max_retries: + msg = f"Maximum number of retries ({max_retries}) exceeded" + raise ELMRuntimeError(msg) from error + return num_retries + + +def _compute_delay(delay, exponential_base, jitter): + """Compute the next delay time""" + return delay * exponential_base * (1 + jitter * random.random()) + + +def _double_timeout(**kwargs): + """Double timeout parameter if it exists in kwargs.""" + if "timeout" not in kwargs: + return kwargs + + prev_timeout = kwargs["timeout"] + logger.info( + "Detected 'timeout' key in kwargs. Doubling this input from " + "%.2f to %.2f for next iteration.", + prev_timeout, + prev_timeout * 2, + ) + kwargs["timeout"] = prev_timeout * 2 + return kwargs +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/web/document.html b/_modules/elm/web/document.html new file mode 100644 index 00000000..ad2e3188 --- /dev/null +++ b/_modules/elm/web/document.html @@ -0,0 +1,564 @@ + + + + + + elm.web.document — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.web.document

+# -*- coding: utf-8 -*-
+"""ELM Web Document class definitions"""
+from abc import ABC, abstractmethod
+from copy import deepcopy
+from functools import cached_property
+
+from elm.utilities.parse import (
+    combine_pages,
+    clean_headers,
+    html_to_text,
+    remove_blank_pages,
+    format_html_tables,
+    replace_common_pdf_conversion_chars,
+    replace_multi_dot_lines,
+    remove_empty_lines_or_page_footers,
+)
+
+
+
+[docs] +class BaseDocument(ABC): + """Base ELM web document representation.""" + + def __init__(self, pages, metadata=None): + """ + + Parameters + ---------- + pages : iterable + Iterable of strings, where each string is a page of a + document. + metadata : dict, optional + Optional dict containing metadata for the document. + By default, ``None``. + """ + self.pages = remove_blank_pages(pages) + self.metadata = metadata or {} + + @property + def empty(self): + """bool: ``True`` if the document contains no pages.""" + return not self.pages + + @cached_property + def raw_pages(self): + """list: List of (a limited count of) raw pages""" + if not self.pages: + return [] + + return self._raw_pages() + + @cached_property + def text(self): + """str: Cleaned text from document""" + if not self.pages: + return "" + + return self._cleaned_text() + + @abstractmethod + def _raw_pages(self): + """Get raw pages from document""" + raise NotImplementedError( + "This document does not implement a raw pages extraction function" + ) + + @abstractmethod + def _cleaned_text(self): + """Compute cleaned text from document""" + raise NotImplementedError( + "This document does not implement a pages cleaning function" + ) + + @property + @abstractmethod + def WRITE_KWARGS(self): + """dict: Dict of kwargs to pass to `open` when writing this doc.""" + raise NotImplementedError + + @property + @abstractmethod + def FILE_EXTENSION(self): + """str: Cleaned document file extension.""" + raise NotImplementedError
+ + + +
+[docs] +class PDFDocument(BaseDocument): + """ELM web PDF document""" + + CLEAN_HEADER_KWARGS = { + "char_thresh": 0.6, + "page_thresh": 0.8, + "split_on": "\n", + "iheaders": [0, 1, 3, -3, -2, -1], + } + """Default :func:`~elm.utilities.parse.clean_headers` arguments""" + WRITE_KWARGS = {"mode": "wb"} + FILE_EXTENSION = "pdf" + + def __init__( + self, + pages, + metadata=None, + percent_raw_pages_to_keep=25, + max_raw_pages=18, + num_end_pages_to_keep=2, + clean_header_kwargs=None, + ): + """ + + Parameters + ---------- + pages : iterable + Iterable of strings, where each string is a page of a + document. + metadata : str, optional + metadata : dict, optional + Optional dict containing metadata for the document. + By default, ``None``. + percent_raw_pages_to_keep : int, optional + Percent of "raw" pages to keep. Useful for extracting info + from headers/footers of a doc, which are normally stripped + to form the "clean" text. By default, ``25``. + max_raw_pages : int, optional + The max number of raw pages to keep. The number of raw pages + will never exceed the total of this value + + `num_end_pages_to_keep`. By default, ``18``. + num_end_pages_to_keep : int, optional + Number of additional pages to keep from the end of the + document. This can be useful to extract more meta info. + The number of raw pages will never exceed the total of this + value + `max_raw_pages`. By default, ``2``. + clean_header_kwargs : dict, optional + Optional dictionary of keyword-value pair arguments to pass + to the :func:`~elm.utilities.parse.clean_headers` + function. By default, ``None``. + """ + super().__init__(pages, metadata=metadata) + self.percent_raw_pages_to_keep = percent_raw_pages_to_keep + self.max_raw_pages = min(len(self.pages), max_raw_pages) + self.num_end_pages_to_keep = num_end_pages_to_keep + self.clean_header_kwargs = deepcopy(self.CLEAN_HEADER_KWARGS) + self.clean_header_kwargs.update(clean_header_kwargs or {}) + + @cached_property + def num_raw_pages_to_keep(self): + """int: Number of raw pages to keep from PDF document""" + num_to_keep = self.percent_raw_pages_to_keep / 100 * len(self.pages) + return min(self.max_raw_pages, max(1, int(num_to_keep))) + + @cached_property + def _last_page_index(self): + """int: last page index (determines how many end pages to include)""" + neg_num_extra_pages = self.num_raw_pages_to_keep - len(self.pages) + neg_num_last_pages = max( + -self.num_end_pages_to_keep, neg_num_extra_pages + ) + return min(0, neg_num_last_pages) + + def _cleaned_text(self): + """Compute cleaned text from document""" + pages = clean_headers(deepcopy(self.pages), **self.clean_header_kwargs) + text = combine_pages(pages) + text = replace_common_pdf_conversion_chars(text) + text = replace_multi_dot_lines(text) + text = remove_empty_lines_or_page_footers(text) + return text + + # pylint: disable=unnecessary-comprehension + # fmt: off + def _raw_pages(self): + """Get raw pages from document""" + raw_pages = [page for page in self.pages[:self.num_raw_pages_to_keep]] + if self._last_page_index: + raw_pages += [page for page in self.pages[self._last_page_index:]] + return raw_pages
+ + + +
+[docs] +class HTMLDocument(BaseDocument): + """ELM web HTML document""" + + HTML_TABLE_TO_MARKDOWN_KWARGS = { + "floatfmt": ".5f", + "index": True, + "tablefmt": "psql", + } + """Default :func:`~elm.utilities.parse.format_html_tables` arguments""" + WRITE_KWARGS = {"mode": "w", "encoding": "utf-8"} + FILE_EXTENSION = "txt" + + def __init__( + self, + pages, + metadata=None, + html_table_to_markdown_kwargs=None, + ignore_html_links=True, + text_splitter=None, + ): + """ + + Parameters + ---------- + pages : iterable + Iterable of strings, where each string is a page of a + document. + metadata : dict, optional + Optional dict containing metadata for the document. + By default, ``None``. + html_table_to_markdown_kwargs : dict, optional + Optional dictionary of keyword-value pair arguments to pass + to the :func:`~elm.utilities.parse.format_html_tables` + function. By default, ``None``. + ignore_html_links : bool, optional + Option to ignore link in HTML text during parsing. + By default, ``True``. + text_splitter : obj, optional + Instance of an object that implements a `split_text` method. + The method should take text as input (str) and return a list + of text chunks. The raw pages will be passed through this + splitter to create raw pages for this document. Langchain's + text splitters should work for this input. + By default, ``None``, which means the original pages input + becomes the raw pages attribute. + """ + super().__init__(pages, metadata=metadata) + self.html_table_to_markdown_kwargs = deepcopy( + self.HTML_TABLE_TO_MARKDOWN_KWARGS + ) + self.html_table_to_markdown_kwargs.update( + html_table_to_markdown_kwargs or {} + ) + self.ignore_html_links = ignore_html_links + self.text_splitter = text_splitter + + def _cleaned_text(self): + """Compute cleaned text from document""" + text = combine_pages(self.pages) + text = html_to_text(text, self.ignore_html_links) + text = format_html_tables(text, **self.html_table_to_markdown_kwargs) + return text + + def _raw_pages(self): + """Get raw pages from document""" + if self.text_splitter is None: + return self.pages + return self.text_splitter.split_text("\n\n".join(self.pages))
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/web/file_loader.html b/_modules/elm/web/file_loader.html new file mode 100644 index 00000000..b1f15a1e --- /dev/null +++ b/_modules/elm/web/file_loader.html @@ -0,0 +1,554 @@ + + + + + + elm.web.file_loader — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.web.file_loader

+# -*- coding: utf-8 -*-
+"""ELM Web file loader class."""
+import asyncio
+import logging
+
+import aiohttp
+from fake_useragent import UserAgent
+
+from elm.utilities.parse import read_pdf
+from elm.web.document import PDFDocument, HTMLDocument
+from elm.web.html_pw import load_html_with_pw
+from elm.utilities.retry import async_retry_with_exponential_backoff
+from elm.exceptions import ELMRuntimeError
+
+
+logger = logging.getLogger(__name__)
+
+
+async def _read_pdf_doc(pdf_bytes, **kwargs):
+    """Default read PDF function (runs in main thread)"""
+    pages = read_pdf(pdf_bytes)
+    return PDFDocument(pages, **kwargs)
+
+
+async def _read_html_doc(text, **kwargs):
+    """Default read HTML function (runs in main thread)"""
+    return HTMLDocument([text], **kwargs)
+
+
+
+[docs] +class AsyncFileLoader: + """Async web file (PDF or HTML) loader""" + + DEFAULT_HEADER_TEMPLATE = { + "User-Agent": "", + "Accept": ( + "text/html,application/xhtml+xml,application/xml;" + "q=0.9,image/webp,*/*;q=0.8" + ), + "Accept-Language": "en-US,en;q=0.5", + "Referer": "https://www.google.com/", + "DNT": "1", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + } + """Default header""" + + def __init__( + self, + header_template=None, + verify_ssl=True, + aget_kwargs=None, + pw_launch_kwargs=None, + pdf_read_kwargs=None, + html_read_kwargs=None, + pdf_read_coroutine=None, + html_read_coroutine=None, + pdf_ocr_read_coroutine=None, + file_cache_coroutine=None, + browser_semaphore=None, + ): + """ + + Parameters + ---------- + header_template : dict, optional + Optional GET header template. If not specified, uses the + `DEFAULT_HEADER_TEMPLATE` defined for this class. + By default, ``None``. + verify_ssl : bool, optional + Option to use aiohttp's default SSL check. If ``False``, + SSL certificate validation is skipped. By default, ``True``. + aget_kwargs : dict, optional + Other kwargs to pass to :meth:`aiohttp.ClientSession.get`. + By default, ``None``. + pw_launch_kwargs : dict, optional + Keyword-value argument pairs to pass to + :meth:`async_playwright.chromium.launch` (only used when + reading HTML). By default, ``None``. + pdf_read_kwargs : dict, optional + Keyword-value argument pairs to pass to the + `pdf_read_coroutine`. By default, ``None``. + html_read_kwargs : dict, optional + Keyword-value argument pairs to pass to the + `html_read_coroutine`. By default, ``None``.. By default, ``None``. + pdf_read_coroutine : callable, optional + PDF file read coroutine. Must by an async function. Should + accept PDF bytes as the first argument and kwargs as the + rest. Must return a :obj:`elm.web.document.PDFDocument`. + If ``None``, a default function that runs in the main thread + is used. By default, ``None``. + html_read_coroutine : callable, optional + HTML file read coroutine. Must by an async function. Should + accept HTML text as the first argument and kwargs as the + rest. Must return a :obj:`elm.web.document.HTMLDocument`. + If ``None``, a default function that runs in the main thread + is used. By default, ``None``. + pdf_ocr_read_coroutine : callable, optional + PDF OCR file read coroutine. Must by an async function. + Should accept PDF bytes as the first argument and kwargs as + the rest. Must return a :obj:`elm.web.document.PDFDocument`. + If ``None``, PDF OCR parsing is not attempted, and any + scanned PDF URL's will return a blank document. + By default, ``None``. + file_cache_coroutine : callable, optional + File caching coroutine. Can be used to cache files + downloaded by this class. Must accept an + :obj:`~elm.web.document.Document` instance as the first + argument and the file content to be written as the second + argument. If this method is not provided, no document + caching is performed. By default, ``None``. + browser_semaphore : asyncio.Semaphore, optional + Semaphore instance that can be used to limit the number of + playwright browsers open concurrently. If ``None``, no + limits are applied. By default, ``None``. + """ + self.pw_launch_kwargs = pw_launch_kwargs or {} + self.pdf_read_kwargs = pdf_read_kwargs or {} + self.html_read_kwargs = html_read_kwargs or {} + self.get_kwargs = { + "headers": self._header_from_template(header_template), + "ssl": None if verify_ssl else False, + **(aget_kwargs or {}), + } + self.pdf_read_coroutine = pdf_read_coroutine or _read_pdf_doc + self.html_read_coroutine = html_read_coroutine or _read_html_doc + self.pdf_ocr_read_coroutine = pdf_ocr_read_coroutine + self.file_cache_coroutine = file_cache_coroutine + self.browser_semaphore = browser_semaphore + + def _header_from_template(self, header_template): + """Compile header from user or default template""" + headers = header_template or self.DEFAULT_HEADER_TEMPLATE + if not headers.get("User-Agent"): + headers["User-Agent"] = UserAgent().random + return dict(headers) + +
+[docs] + async def fetch_all(self, *urls): + """Fetch documents for all requested URL's. + + Parameters + ---------- + *urls + Iterable of URL's (as strings) to fetch. + + Returns + ------- + list + List of documents, one per requested URL. + """ + outer_task_name = asyncio.current_task().get_name() + fetches = [ + asyncio.create_task(self.fetch(url), name=outer_task_name) + for url in urls + ] + return await asyncio.gather(*fetches)
+ + +
+[docs] + async def fetch(self, url): + """Fetch a document for the given URL. + + Parameters + ---------- + url : str + URL for the document to pull down. + + Returns + ------- + :class:`elm.web.document.Document` + Document instance containing text, if the fetch was + successful. + """ + doc, raw_content = await self._fetch_doc_with_url_in_metadata(url) + doc = await self._cache_doc(doc, raw_content) + return doc
+ + + async def _fetch_doc_with_url_in_metadata(self, url): + """Fetch doc contents and add URL to metadata""" + doc, raw_content = await self._fetch_doc(url) + doc.metadata["source"] = url + return doc, raw_content + + async def _fetch_doc(self, url): + """Fetch a doc by trying pdf read, then HTML read, then PDF OCR""" + + async with aiohttp.ClientSession() as session: + try: + url_bytes = await self._fetch_content_with_retry(url, session) + except ELMRuntimeError: + return PDFDocument(pages=[]), None + + doc = await self.pdf_read_coroutine(url_bytes, **self.pdf_read_kwargs) + if doc.pages: + return doc, url_bytes + + text = await load_html_with_pw( + url, self.browser_semaphore, **self.pw_launch_kwargs + ) + doc = await self.html_read_coroutine(text, **self.html_read_kwargs) + if doc.pages: + return doc, doc.text + + if self.pdf_ocr_read_coroutine: + doc = await self.pdf_ocr_read_coroutine( + url_bytes, **self.pdf_read_kwargs + ) + + return doc, url_bytes + + @async_retry_with_exponential_backoff( + base_delay=2, + exponential_base=1.5, + jitter=False, + max_retries=3, + errors=( + aiohttp.ClientConnectionError, + aiohttp.client_exceptions.ClientConnectorCertificateError, + ), + ) + async def _fetch_content_with_retry(self, url, session): + """Fetch content from URL with several retry attempts""" + async with session.get(url, **self.get_kwargs) as response: + return await response.read() + + async def _cache_doc(self, doc, raw_content): + """Cache doc if user provided a coroutine""" + if doc.empty or not raw_content: + return doc + + if not self.file_cache_coroutine: + return doc + + cache_fn = await self.file_cache_coroutine(doc, raw_content) + if cache_fn is not None: + doc.metadata["cache_fn"] = cache_fn + return doc
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/web/google_search.html b/_modules/elm/web/google_search.html new file mode 100644 index 00000000..0efef376 --- /dev/null +++ b/_modules/elm/web/google_search.html @@ -0,0 +1,455 @@ + + + + + + elm.web.google_search — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.web.google_search

+# -*- coding: utf-8 -*-
+"""ELM Web Scraping - Google search."""
+import asyncio
+import logging
+
+from playwright.async_api import (
+    async_playwright,
+    TimeoutError as PlaywrightTimeoutError,
+)
+
+from elm.web.utilities import clean_search_query
+
+
+logger = logging.getLogger(__name__)
+_SEARCH_RESULT_TAG = '[jsname="UWckNb"]'
+
+
+
+[docs] +class PlaywrightGoogleLinkSearch: + """Search for top results on google and return their links""" + + EXPECTED_RESULTS_PER_PAGE = 10 + """Number of results displayed per Google page. """ + + def __init__(self, **launch_kwargs): + """ + + Parameters + ---------- + **launch_kwargs + Keyword arguments to be passed to + `playwright.chromium.launch`. For example, you can pass + ``headless=False, slow_mo=50`` for a visualization of the + search. + """ + self.launch_kwargs = launch_kwargs + self._browser = None + + async def _load_browser(self, pw_instance): + """Launch a chromium instance and load a page""" + self._browser = await pw_instance.chromium.launch(**self.launch_kwargs) + + async def _close_browser(self): + """Close browser instance and reset internal attributes""" + await self._browser.close() + self._browser = None + + async def _search(self, query, num_results=10): + """Search google for links related to a query.""" + logger.debug("Searching Google: %r", query) + num_results = min(num_results, self.EXPECTED_RESULTS_PER_PAGE) + + page = await self._browser.new_page() + await _navigate_to_google(page) + await _perform_google_search(page, query) + return await _extract_links(page, num_results) + + async def _skip_exc_search(self, query, num_results=10): + """Perform search while ignoring timeout errors""" + try: + return await self._search(query, num_results=num_results) + except PlaywrightTimeoutError as e: + logger.exception(e) + return [] + + async def _get_links(self, queries, num_results): + """Get links for multiple queries""" + outer_task_name = asyncio.current_task().get_name() + async with async_playwright() as pw_instance: + await self._load_browser(pw_instance) + searches = [ + asyncio.create_task( + self._skip_exc_search(query, num_results=num_results), + name=outer_task_name, + ) + for query in queries + ] + results = await asyncio.gather(*searches) + await self._close_browser() + return results + +
+[docs] + async def results(self, *queries, num_results=10): + """Retrieve links for the first `num_results` of each query. + + This function executes a google search for each input query and + returns a list of links corresponding to the top `num_results`. + + Parameters + ---------- + num_results : int, optional + Number of top results to retrieve for each query. Note that + this value can never exceed the number of results per page + (typically 10). If you pass in a larger value, it will be + reduced to the number of results per page. + By default, ``10``. + + Returns + ------- + list + List equal to the length of the input queries, where each + entry is another list containing the top `num_results` + links. + """ + queries = map(clean_search_query, queries) + return await self._get_links(queries, num_results)
+
+ + + +async def _navigate_to_google(page): + """Navigate to Google domain.""" + await page.goto("https://www.google.com") + await page.wait_for_load_state("networkidle") + + +async def _perform_google_search(page, search_query): + """Fill in search bar with user query and click search button""" + await page.get_by_label("Search", exact=True).fill(search_query) + await _close_autofill_suggestions(page) + await page.get_by_role("button", name="Google Search").click() + + +async def _close_autofill_suggestions(page): + """Google autofill suggestions often get in way of search button. + + We get around this by closing the suggestion dropdown before + looking for the search button. Looking for the "Google Search" + button doesn't work because it is sometimes obscured by the dropdown + menu. Clicking the "Google" logo can also fail when they add + seasonal links/images (e.g. holiday logos). Current solutions is to + look for a specific div at the top of the page. + """ + await page.locator("#gb").click() + + +async def _extract_links(page, num_results): + """Extract links for top `num_results` on page""" + links = await asyncio.to_thread(page.locator, _SEARCH_RESULT_TAG) + return [ + await links.nth(i).get_attribute("href") for i in range(num_results) + ] +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/web/html_pw.html b/_modules/elm/web/html_pw.html new file mode 100644 index 00000000..4c6c331c --- /dev/null +++ b/_modules/elm/web/html_pw.html @@ -0,0 +1,420 @@ + + + + + + elm.web.html_pw — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.web.html_pw

+# -*- coding: utf-8 -*-
+"""ELM Web HTML loading with Playwright
+
+We use Playwright so that javascript text is rendered before we scrape.
+"""
+import logging
+from contextlib import AsyncExitStack
+
+from playwright.async_api import async_playwright
+from playwright.async_api import Error as PlaywrightError
+from playwright.async_api import TimeoutError as PlaywrightTimeoutError
+
+logger = logging.getLogger(__name__)
+
+# block pages by resource type. e.g. image, stylesheet
+BLOCK_RESOURCE_TYPES = [
+    "beacon",
+    "csp_report",
+    "font",
+    "image",
+    "imageset",
+    "media",
+    "object",
+    "texttrack",
+    #  can block stylsheets and scripts, though it's not recommended:
+    # 'stylesheet',
+    # 'script',
+    # 'xhr',
+]
+
+
+# block popular 3rd party resources like tracking and advertisements.
+BLOCK_RESOURCE_NAMES = [
+    "adzerk",
+    "analytics",
+    "cdn.api.twitter",
+    "doubleclick",
+    "exelator",
+    "facebook",
+    "fontawesome",
+    "google",
+    "google-analytics",
+    "googletagmanager",
+    "lit.connatix",  # <- not sure about this one
+]
+
+
+async def _intercept_route(route):  # pragma: no cover
+    """intercept all requests and abort blocked ones
+
+    Source: https://scrapfly.io/blog/how-to-block-resources-in-playwright/
+    """
+    if route.request.resource_type in BLOCK_RESOURCE_TYPES:
+        return await route.abort()
+
+    if any(key in route.request.url for key in BLOCK_RESOURCE_NAMES):
+        return await route.abort()
+
+    return await route.continue_()
+
+
+
+[docs] +async def load_html_with_pw( # pragma: no cover + url, browser_semaphore=None, **pw_launch_kwargs +): + """Extract HTML from URL using Playwright. + + Parameters + ---------- + url : str + URL to pull HTML for. + browser_semaphore : asyncio.Semaphore, optional + Semaphore instance that can be used to limit the number of + playwright browsers open concurrently. If ``None``, no limits + are applied. By default, ``None``. + **pw_launch_kwargs + Keyword-value argument pairs to pass to + :meth:`async_playwright.chromium.launch`. + + Returns + ------- + str + HTML from page. + """ + try: + text = await _load_html(url, browser_semaphore, **pw_launch_kwargs) + except (PlaywrightError, PlaywrightTimeoutError): + text = "" + return text
+ + + +async def _load_html( # pragma: no cover + url, browser_sem=None, **pw_launch_kwargs +): + """Load html using playwright""" + if browser_sem is None: + browser_sem = AsyncExitStack() + + async with async_playwright() as p, browser_sem: + browser = await p.chromium.launch(**pw_launch_kwargs) + page = await browser.new_page() + await page.route("**/*", _intercept_route) + await page.goto(url) + await page.wait_for_load_state("networkidle", timeout=90_000) + text = await page.content() + + return text +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/web/utilities.html b/_modules/elm/web/utilities.html new file mode 100644 index 00000000..850b9398 --- /dev/null +++ b/_modules/elm/web/utilities.html @@ -0,0 +1,436 @@ + + + + + + elm.web.utilities — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.web.utilities

+# -*- coding: utf-8 -*-
+"""ELM Web Scraping utilities."""
+import uuid
+import hashlib
+from pathlib import Path
+
+from slugify import slugify
+
+
+
+[docs] +def clean_search_query(query): + """Check if the first character is a digit and remove it if so. + + Some search tools (e.g., Google) will fail to return results if the + query has a leading digit: 1. "LangCh..." + + This function will take all the text after the first double quote + (") if a digit is detected at the beginning of the string. + + Parameters + ---------- + query : str + Input query that may or may not contain a leading digit. + + Returns + ------- + str + Cleaned query. + """ + query = query.strip() + if len(query) < 1: + return query + + if not query[0].isdigit(): + return query.strip() + + if (first_quote_pos := query[:-1].find('"')) == -1: + return query.strip() + + last_ind = -1 if query.endswith('"') else None + + # fmt: off + return query[first_quote_pos + 1:last_ind].strip()
+ + + +
+[docs] +def compute_fn_from_url(url, make_unique=False): + """Compute a unique file name from URL string. + + File name will always be 128 characters or less, unless the + `make_unique` argument is set to true. In that case, the max + length is 164 (a UUID is tagged onto the filename). + + Parameters + ---------- + url : str + Input URL to convert into filename. + make_unique : bool, optional + Option to add a UUID at the end of the file name to make it + unique. By default, ``False``. + + Returns + ------- + str + Valid filename representation of the URL. + """ + url = url.replace("https", "").replace("http", "").replace("www", "") + url = slugify(url) + url = url.replace("-", "").replace("_", "") + + url = _shorten_using_sha(url) + + if make_unique: + url = f"{url}{uuid.uuid4()}".replace("-", "") + + return url
+ + + +def _shorten_using_sha(fn): + """Reduces FN to 128 characters""" + if len(fn) <= 128: + return fn + + out = hashlib.sha256(bytes(fn[64:], encoding="utf-8")).hexdigest() + return f"{fn[:64]}{out}" + + +
+[docs] +def write_url_doc_to_file(doc, file_content, out_dir, make_name_unique=False): + """Write a file pulled from URL to disk. + + Parameters + ---------- + doc : elm.web.document.Document + Document containing meta information about the file. Must have a + "source" key in the `metadata` dict containing the URL, which + will be converted to a file name using + :func:`compute_fn_from_url`. + file_content : str | bytes + File content, typically string text for HTML files and bytes + for PDF file. + out_dir : path-like + Path to directory where file should be stored. + make_name_unique : bool, optional + Option to make file name unique by adding a UUID at the end of + the file name. By default, ``False``. + + Returns + ------- + Path + Path to output file. + """ + out_fn = compute_fn_from_url( + url=doc.metadata["source"], make_unique=make_name_unique + ) + out_fp = Path(out_dir) / f"{out_fn}.{doc.FILE_EXTENSION}" + with open(out_fp, **doc.WRITE_KWARGS) as fh: + fh.write(file_content) + return out_fp
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/elm/wizard.html b/_modules/elm/wizard.html new file mode 100644 index 00000000..a1ded05c --- /dev/null +++ b/_modules/elm/wizard.html @@ -0,0 +1,645 @@ + + + + + + elm.wizard — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +

Source code for elm.wizard

+# -*- coding: utf-8 -*-
+"""
+ELM energy wizard
+"""
+import copy
+import numpy as np
+
+from elm.base import ApiBase
+
+
+
+[docs] +class EnergyWizard(ApiBase): + """Interface to ask OpenAI LLMs about energy research.""" + + MODEL_ROLE = "You parse through articles to answer questions." + """High level model role, somewhat redundant to MODEL_INSTRUCTION""" + + MODEL_INSTRUCTION = ('Use the information below to answer the subsequent ' + 'question. If the answer cannot be found in the ' + 'text, write "I could not find an answer."') + """Prefix to the engineered prompt""" + + def __init__(self, corpus, model=None, token_budget=3500, ref_col=None): + """ + Parameters + ---------- + corpus : pd.DataFrame + Corpus of text in dataframe format. Must have columns "text" and + "embedding". + model : str + GPT model name, default is the DEFAULT_MODEL global var + token_budget : int + Number of tokens that can be embedded in the prompt. Note that the + default budget for GPT-3.5-Turbo is 4096, but you want to subtract + some tokens to account for the response budget. + ref_col : None | str + Optional column label in the corpus that provides a reference text + string for each chunk of text. + """ + + super().__init__(model) + + self.corpus = self.preflight_corpus(corpus) + self.token_budget = token_budget + self.embedding_arr = np.vstack(self.corpus['embedding'].values) + self.text_arr = self.corpus['text'].values + self.ref_col = ref_col + +
+[docs] + @staticmethod + def preflight_corpus(corpus, required=('text', 'embedding')): + """Run preflight checks on the text corpus. + + Parameters + ---------- + corpus : pd.DataFrame + Corpus of text in dataframe format. Must have columns "text" and + "embedding". + required : list | tuple + Column names required to be in the corpus df + + Returns + ------- + corpus : pd.DataFrame + Corpus of text in dataframe format. Must have columns "text" and + "embedding". + """ + missing = [col for col in required if col not in corpus] + if any(missing): + msg = ('Text corpus must have {} columns but received ' + 'corpus with columns: {}' + .format(missing, list(corpus.columns))) + raise KeyError(msg) + + if not isinstance(corpus.index.values[0], int): + corpus['index'] = np.arange(len(corpus)) + corpus = corpus.set_index('index', drop=False) + + return corpus
+ + +
+[docs] + def cosine_dist(self, query_embedding): + """Compute the cosine distance of the query embedding array vs. all of + the embedding arrays of the full text corpus + + Parameters + ---------- + query_embedding : np.ndarray + 1D array of the numerical embedding of the request query. + + Returns + ------- + out : np.ndarray + 1D array with length equal to the number of entries in the text + corpus. Each value is a distance score where smaller is closer + """ + + dot = np.dot(self.embedding_arr, query_embedding) + norm1 = np.linalg.norm(query_embedding) + norm2 = np.linalg.norm(self.embedding_arr, axis=1) + + out = 1 - (dot / (norm1 * norm2)) + + return out
+ + +
+[docs] + def rank_strings(self, query, top_n=100): + """Returns a list of strings and relatednesses, sorted from most + related to least. + + Parameters + ---------- + query : str + Question being asked of GPT + top_n : int + Number of top results to return. + + Returns + ------- + strings : np.ndarray + 1D array of related strings + score : np.ndarray + 1D array of float scores of strings + idx : np.ndarray + 1D array of indices in the text corpus corresponding to the + ranked strings/scores outputs. + """ + + embedding = self.get_embedding(query) + scores = 1 - self.cosine_dist(embedding) + best = np.argsort(scores)[::-1][:top_n] + + strings = self.text_arr[best] + scores = scores[best] + + return strings, scores, best
+ + +
+[docs] + def engineer_query(self, query, token_budget=None, new_info_threshold=0.7, + convo=False): + """Engineer a query for GPT using the corpus of information + + Parameters + ---------- + query : str + Question being asked of GPT + token_budget : int + Option to override the class init token budget. + new_info_threshold : float + New text added to the engineered query must contain at least this + much new information. This helps prevent (for example) the table of + contents being added multiple times. + convo : bool + Flag to perform semantic search with full conversation history + (True) or just the single query (False). Call EnergyWizard.clear() + to reset the chat history. + Returns + ------- + message : str + Engineered question to GPT including information from corpus and + the original query + references : list + The list of references (strs) used in the engineered prompt is + returned here + """ + + self.messages.append({"role": "user", "content": query}) + + if convo: + # [1:] to not include the system role in the semantic search + query = [f"{msg['role'].upper()}: {msg['content']}" + for msg in self.messages[1:]] + query = '\n\n'.join(query) + + token_budget = token_budget or self.token_budget + + strings, _, idx = self.rank_strings(query) + + message = copy.deepcopy(self.MODEL_INSTRUCTION) + question = f"\n\nQuestion: {query}" + used_index = [] + + for string, i in zip(strings, idx): + next_str = (f'\n\n"""\n{string}\n"""') + token_usage = self.count_tokens(message + next_str + question, + self.model) + + new_words = set(next_str.split(' ')) + additional_info = new_words - set(message.split(' ')) + new_info_frac = len(additional_info) / len(new_words) + + if new_info_frac > new_info_threshold: + if token_usage > token_budget: + break + else: + message += next_str + used_index.append(i) + + message = message + question + used_index = np.array(used_index) + references = self.make_ref_list(used_index) + + return message, references
+ + +
+[docs] + def make_ref_list(self, idx): + """Make a reference list + + Parameters + ---------- + used_index : np.ndarray + Indices of the used text from the text corpus + + Returns + ------- + ref_list : list + A list of references (strs) used. + """ + ref_list = '' + if self.ref_col is not None and self.ref_col in self.corpus: + ref_list = list(self.corpus[self.ref_col].iloc[idx].unique()) + + return ref_list
+ + +
+[docs] + def chat(self, query, + debug=True, + stream=True, + temperature=0, + convo=False, + token_budget=None, + new_info_threshold=0.7, + print_references=False, + return_chat_obj=False): + """Answers a query by doing a semantic search of relevant text with + embeddings and then sending engineered query to the LLM. + + Parameters + ---------- + query : str + Question being asked of EnergyWizard + debug : bool + Flag to return extra diagnostics on the engineered question. + stream : bool + Flag to print subsequent chunks of the response in a streaming + fashion + temperature : float + GPT model temperature, a measure of response entropy from 0 to 1. 0 + is more reliable and nearly deterministic; 1 will give the model + more creative freedom and may not return as factual of results. + convo : bool + Flag to perform semantic search with full conversation history + (True) or just the single query (False). Call EnergyWizard.clear() + to reset the chat history. + token_budget : int + Option to override the class init token budget. + new_info_threshold : float + New text added to the engineered query must contain at least this + much new information. This helps prevent (for example) the table of + contents being added multiple times. + print_references : bool + Flag to print references if EnergyWizard is initialized with a + valid ref_col. + return_chat_obj : bool + Flag to only return the ChatCompletion from OpenAI API. + + Returns + ------- + response : str + GPT output / answer. + query : str + If debug is True, the engineered query asked of GPT will also be + returned here + references : list + If debug is True, the list of references (strs) used in the + engineered prompt is returned here + """ + + out = self.engineer_query(query, token_budget=token_budget, + new_info_threshold=new_info_threshold, + convo=convo) + query, references = out + + messages = [{"role": "system", "content": self.MODEL_ROLE}, + {"role": "user", "content": query}] + response_message = '' + kwargs = dict(model=self.model, + messages=messages, + temperature=temperature, + stream=stream) + + response = self._client.chat.completions.create(**kwargs) + + if return_chat_obj: + return response, query, references + + if stream: + for chunk in response: + chunk_msg = chunk.choices[0].delta.content or "" + response_message += chunk_msg + print(chunk_msg, end='') + + else: + response_message = response.choices[0].message.content + + self.messages.append({'role': 'assistant', + 'content': response_message}) + + if any(references) and print_references: + ref_msg = ('\n\nThe model was provided with the ' + 'following documents to support its answer:') + ref_msg += '\n - ' + '\n - '.join(references) + response_message += ref_msg + if stream: + print(ref_msg) + + if debug: + return response_message, query, references + else: + return response_message
+
+ +
+ +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/_modules/index.html b/_modules/index.html new file mode 100644 index 00000000..fd8d59d0 --- /dev/null +++ b/_modules/index.html @@ -0,0 +1,351 @@ + + + + + + Overview: module code — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ + +
+
+ + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.base.ApiBase.rst.txt b/_sources/_autosummary/elm.base.ApiBase.rst.txt new file mode 100644 index 00000000..0451a30a --- /dev/null +++ b/_sources/_autosummary/elm.base.ApiBase.rst.txt @@ -0,0 +1,43 @@ +elm.base.ApiBase +================ + +.. currentmodule:: elm.base + +.. autoclass:: ApiBase + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ApiBase.call_api + ~ApiBase.call_api_async + ~ApiBase.chat + ~ApiBase.clear + ~ApiBase.count_tokens + ~ApiBase.generic_async_query + ~ApiBase.generic_query + ~ApiBase.get_embedding + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~ApiBase.DEFAULT_MODEL + ~ApiBase.EMBEDDING_MODEL + ~ApiBase.EMBEDDING_URL + ~ApiBase.HEADERS + ~ApiBase.MODEL_ROLE + ~ApiBase.URL + ~ApiBase.all_messages_txt + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.base.ApiQueue.rst.txt b/_sources/_autosummary/elm.base.ApiQueue.rst.txt new file mode 100644 index 00000000..b93b6594 --- /dev/null +++ b/_sources/_autosummary/elm.base.ApiQueue.rst.txt @@ -0,0 +1,32 @@ +elm.base.ApiQueue +================= + +.. currentmodule:: elm.base + +.. autoclass:: ApiQueue + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ApiQueue.collect_jobs + ~ApiQueue.run + ~ApiQueue.submit_jobs + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~ApiQueue.waiting_on + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.base.rst.txt b/_sources/_autosummary/elm.base.rst.txt new file mode 100644 index 00000000..87aaf975 --- /dev/null +++ b/_sources/_autosummary/elm.base.rst.txt @@ -0,0 +1,32 @@ +elm.base +======== + +.. automodule:: elm.base + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + ApiBase + ApiQueue + + + + + + + + + diff --git a/_sources/_autosummary/elm.chunk.Chunker.rst.txt b/_sources/_autosummary/elm.chunk.Chunker.rst.txt new file mode 100644 index 00000000..5e06ef21 --- /dev/null +++ b/_sources/_autosummary/elm.chunk.Chunker.rst.txt @@ -0,0 +1,52 @@ +elm.chunk.Chunker +================= + +.. currentmodule:: elm.chunk + +.. autoclass:: Chunker + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~Chunker.add_overlap + ~Chunker.call_api + ~Chunker.call_api_async + ~Chunker.chat + ~Chunker.chunk_text + ~Chunker.clean_paragraphs + ~Chunker.clear + ~Chunker.count_tokens + ~Chunker.generic_async_query + ~Chunker.generic_query + ~Chunker.get_embedding + ~Chunker.is_good_paragraph + ~Chunker.merge_chunks + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~Chunker.DEFAULT_MODEL + ~Chunker.EMBEDDING_MODEL + ~Chunker.EMBEDDING_URL + ~Chunker.HEADERS + ~Chunker.MODEL_ROLE + ~Chunker.URL + ~Chunker.all_messages_txt + ~Chunker.chunk_tokens + ~Chunker.chunks + ~Chunker.paragraph_tokens + ~Chunker.paragraphs + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.chunk.rst.txt b/_sources/_autosummary/elm.chunk.rst.txt new file mode 100644 index 00000000..446f930b --- /dev/null +++ b/_sources/_autosummary/elm.chunk.rst.txt @@ -0,0 +1,31 @@ +elm.chunk +========= + +.. automodule:: elm.chunk + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + Chunker + + + + + + + + + diff --git a/_sources/_autosummary/elm.cli.rst.txt b/_sources/_autosummary/elm.cli.rst.txt new file mode 100644 index 00000000..c64e778a --- /dev/null +++ b/_sources/_autosummary/elm.cli.rst.txt @@ -0,0 +1,23 @@ +elm.cli +======= + +.. automodule:: elm.cli + + + + + + + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.embed.ChunkAndEmbed.rst.txt b/_sources/_autosummary/elm.embed.ChunkAndEmbed.rst.txt new file mode 100644 index 00000000..dafa5a73 --- /dev/null +++ b/_sources/_autosummary/elm.embed.ChunkAndEmbed.rst.txt @@ -0,0 +1,46 @@ +elm.embed.ChunkAndEmbed +======================= + +.. currentmodule:: elm.embed + +.. autoclass:: ChunkAndEmbed + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ChunkAndEmbed.call_api + ~ChunkAndEmbed.call_api_async + ~ChunkAndEmbed.chat + ~ChunkAndEmbed.clean_tables + ~ChunkAndEmbed.clear + ~ChunkAndEmbed.count_tokens + ~ChunkAndEmbed.generic_async_query + ~ChunkAndEmbed.generic_query + ~ChunkAndEmbed.get_embedding + ~ChunkAndEmbed.run + ~ChunkAndEmbed.run_async + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~ChunkAndEmbed.DEFAULT_MODEL + ~ChunkAndEmbed.EMBEDDING_MODEL + ~ChunkAndEmbed.EMBEDDING_URL + ~ChunkAndEmbed.HEADERS + ~ChunkAndEmbed.MODEL_ROLE + ~ChunkAndEmbed.URL + ~ChunkAndEmbed.all_messages_txt + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.embed.rst.txt b/_sources/_autosummary/elm.embed.rst.txt new file mode 100644 index 00000000..64866b78 --- /dev/null +++ b/_sources/_autosummary/elm.embed.rst.txt @@ -0,0 +1,31 @@ +elm.embed +========= + +.. automodule:: elm.embed + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + ChunkAndEmbed + + + + + + + + + diff --git a/_sources/_autosummary/elm.exceptions.ELMError.rst.txt b/_sources/_autosummary/elm.exceptions.ELMError.rst.txt new file mode 100644 index 00000000..f211d916 --- /dev/null +++ b/_sources/_autosummary/elm.exceptions.ELMError.rst.txt @@ -0,0 +1,6 @@ +elm.exceptions.ELMError +======================= + +.. currentmodule:: elm.exceptions + +.. autoexception:: ELMError \ No newline at end of file diff --git a/_sources/_autosummary/elm.exceptions.ELMRuntimeError.rst.txt b/_sources/_autosummary/elm.exceptions.ELMRuntimeError.rst.txt new file mode 100644 index 00000000..13aa38ae --- /dev/null +++ b/_sources/_autosummary/elm.exceptions.ELMRuntimeError.rst.txt @@ -0,0 +1,6 @@ +elm.exceptions.ELMRuntimeError +============================== + +.. currentmodule:: elm.exceptions + +.. autoexception:: ELMRuntimeError \ No newline at end of file diff --git a/_sources/_autosummary/elm.exceptions.rst.txt b/_sources/_autosummary/elm.exceptions.rst.txt new file mode 100644 index 00000000..4164853c --- /dev/null +++ b/_sources/_autosummary/elm.exceptions.rst.txt @@ -0,0 +1,31 @@ +elm.exceptions +============== + +.. automodule:: elm.exceptions + + + + + + + + + + + + + + + + .. rubric:: Exceptions + + .. autosummary:: + :toctree: + + ELMError + ELMRuntimeError + + + + + diff --git a/_sources/_autosummary/elm.ords.download.download_county_ordinance.rst.txt b/_sources/_autosummary/elm.ords.download.download_county_ordinance.rst.txt new file mode 100644 index 00000000..8f3152a3 --- /dev/null +++ b/_sources/_autosummary/elm.ords.download.download_county_ordinance.rst.txt @@ -0,0 +1,6 @@ +elm.ords.download.download\_county\_ordinance +============================================= + +.. currentmodule:: elm.ords.download + +.. autofunction:: download_county_ordinance \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.download.rst.txt b/_sources/_autosummary/elm.ords.download.rst.txt new file mode 100644 index 00000000..199dd37d --- /dev/null +++ b/_sources/_autosummary/elm.ords.download.rst.txt @@ -0,0 +1,30 @@ +elm.ords.download +================= + +.. automodule:: elm.ords.download + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + download_county_ordinance + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.rst.txt b/_sources/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.rst.txt new file mode 100644 index 00000000..27bf2841 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.apply.check\_for\_ordinance\_info +===================================================== + +.. currentmodule:: elm.ords.extraction.apply + +.. autofunction:: check_for_ordinance_info \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.rst.txt b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.rst.txt new file mode 100644 index 00000000..0b45bd90 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.apply.extract\_ordinance\_text\_with\_llm +============================================================= + +.. currentmodule:: elm.ords.extraction.apply + +.. autofunction:: extract_ordinance_text_with_llm \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.rst.txt b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.rst.txt new file mode 100644 index 00000000..09f92d30 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.apply.extract\_ordinance\_text\_with\_ngram\_validation +=========================================================================== + +.. currentmodule:: elm.ords.extraction.apply + +.. autofunction:: extract_ordinance_text_with_ngram_validation \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.rst.txt b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.rst.txt new file mode 100644 index 00000000..cb07e365 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.apply.extract_ordinance_values.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.apply.extract\_ordinance\_values +==================================================== + +.. currentmodule:: elm.ords.extraction.apply + +.. autofunction:: extract_ordinance_values \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.apply.rst.txt b/_sources/_autosummary/elm.ords.extraction.apply.rst.txt new file mode 100644 index 00000000..82cb1832 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.apply.rst.txt @@ -0,0 +1,33 @@ +elm.ords.extraction.apply +========================= + +.. automodule:: elm.ords.extraction.apply + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + check_for_ordinance_info + extract_ordinance_text_with_llm + extract_ordinance_text_with_ngram_validation + extract_ordinance_values + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.date.DateExtractor.rst.txt b/_sources/_autosummary/elm.ords.extraction.date.DateExtractor.rst.txt new file mode 100644 index 00000000..afa4ddfb --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.date.DateExtractor.rst.txt @@ -0,0 +1,30 @@ +elm.ords.extraction.date.DateExtractor +====================================== + +.. currentmodule:: elm.ords.extraction.date + +.. autoclass:: DateExtractor + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~DateExtractor.parse + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DateExtractor.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.date.rst.txt b/_sources/_autosummary/elm.ords.extraction.date.rst.txt new file mode 100644 index 00000000..8243a350 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.date.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.date +======================== + +.. automodule:: elm.ords.extraction.date + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + DateExtractor + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.features.SetbackFeatures.rst.txt b/_sources/_autosummary/elm.ords.extraction.features.SetbackFeatures.rst.txt new file mode 100644 index 00000000..c75daf85 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.features.SetbackFeatures.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.features.SetbackFeatures +============================================ + +.. currentmodule:: elm.ords.extraction.features + +.. autoclass:: SetbackFeatures + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~SetbackFeatures.DEFAULT_FEATURE_DESCRIPTIONS + ~SetbackFeatures.FEATURES_AS_IGNORE + ~SetbackFeatures.FEATURE_CLARIFICATIONS + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.features.rst.txt b/_sources/_autosummary/elm.ords.extraction.features.rst.txt new file mode 100644 index 00000000..5240b7e6 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.features.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.features +============================ + +.. automodule:: elm.ords.extraction.features + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + SetbackFeatures + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.rst.txt new file mode 100644 index 00000000..f8b0b381 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.llm\_response\_does\_not\_start\_with\_no +==================================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: llm_response_does_not_start_with_no \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.rst.txt new file mode 100644 index 00000000..ddf07557 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.llm\_response\_starts\_with\_no +========================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: llm_response_starts_with_no \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.rst.txt new file mode 100644 index 00000000..eac8608d --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.llm\_response\_starts\_with\_yes +=========================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: llm_response_starts_with_yes \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.rst.txt new file mode 100644 index 00000000..43403770 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.rst.txt @@ -0,0 +1,38 @@ +elm.ords.extraction.graphs +========================== + +.. automodule:: elm.ords.extraction.graphs + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + llm_response_does_not_start_with_no + llm_response_starts_with_no + llm_response_starts_with_yes + setup_base_graph + setup_conditional + setup_graph_extra_restriction + setup_graph_wes_types + setup_multiplier + setup_participating_owner + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_base_graph.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_base_graph.rst.txt new file mode 100644 index 00000000..82f47b7a --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_base_graph.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_base\_graph +============================================= + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_base_graph \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_conditional.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_conditional.rst.txt new file mode 100644 index 00000000..de3737fc --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_conditional.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_conditional +============================================= + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_conditional \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.rst.txt new file mode 100644 index 00000000..47c51888 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_graph\_extra\_restriction +=========================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_graph_extra_restriction \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.rst.txt new file mode 100644 index 00000000..47268b47 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_graph\_wes\_types +=================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_graph_wes_types \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_multiplier.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_multiplier.rst.txt new file mode 100644 index 00000000..27ae0b61 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_multiplier.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_multiplier +============================================ + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_multiplier \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.rst.txt b/_sources/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.rst.txt new file mode 100644 index 00000000..dc4abb87 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.graphs.setup_participating_owner.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.graphs.setup\_participating\_owner +====================================================== + +.. currentmodule:: elm.ords.extraction.graphs + +.. autofunction:: setup_participating_owner \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.rst.txt b/_sources/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.rst.txt new file mode 100644 index 00000000..e390526e --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.ngrams.convert\_text\_to\_sentence\_ngrams +============================================================== + +.. currentmodule:: elm.ords.extraction.ngrams + +.. autofunction:: convert_text_to_sentence_ngrams \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.ngrams.rst.txt b/_sources/_autosummary/elm.ords.extraction.ngrams.rst.txt new file mode 100644 index 00000000..46f32c4e --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ngrams.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.ngrams +========================== + +.. automodule:: elm.ords.extraction.ngrams + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + convert_text_to_sentence_ngrams + sentence_ngram_containment + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.rst.txt b/_sources/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.rst.txt new file mode 100644 index 00000000..bad6fdfe --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.rst.txt @@ -0,0 +1,6 @@ +elm.ords.extraction.ngrams.sentence\_ngram\_containment +======================================================= + +.. currentmodule:: elm.ords.extraction.ngrams + +.. autofunction:: sentence_ngram_containment \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.rst.txt b/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.rst.txt new file mode 100644 index 00000000..fd62bf50 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.rst.txt @@ -0,0 +1,33 @@ +elm.ords.extraction.ordinance.OrdinanceExtractor +================================================ + +.. currentmodule:: elm.ords.extraction.ordinance + +.. autoclass:: OrdinanceExtractor + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OrdinanceExtractor.check_for_correct_size + ~OrdinanceExtractor.check_for_restrictions + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OrdinanceExtractor.MODEL_INSTRUCTIONS_RESTRICTIONS + ~OrdinanceExtractor.MODEL_INSTRUCTIONS_SIZE + ~OrdinanceExtractor.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.rst.txt b/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.rst.txt new file mode 100644 index 00000000..b2efa0bc --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.rst.txt @@ -0,0 +1,35 @@ +elm.ords.extraction.ordinance.OrdinanceValidator +================================================ + +.. currentmodule:: elm.ords.extraction.ordinance + +.. autoclass:: OrdinanceValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OrdinanceValidator.parse + ~OrdinanceValidator.parse_from_ind + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OrdinanceValidator.CONTAINS_ORD_PROMPT + ~OrdinanceValidator.IS_LEGAL_TEXT_PROMPT + ~OrdinanceValidator.IS_UTILITY_SCALE_PROMPT + ~OrdinanceValidator.is_legal_text + ~OrdinanceValidator.ordinance_text + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.ordinance.rst.txt b/_sources/_autosummary/elm.ords.extraction.ordinance.rst.txt new file mode 100644 index 00000000..70ae6bcc --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.ordinance.rst.txt @@ -0,0 +1,32 @@ +elm.ords.extraction.ordinance +============================= + +.. automodule:: elm.ords.extraction.ordinance + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + OrdinanceExtractor + OrdinanceValidator + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.rst.txt b/_sources/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.rst.txt new file mode 100644 index 00000000..a7707ce5 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.rst.txt @@ -0,0 +1,24 @@ +elm.ords.extraction.parse.StructuredOrdinanceParser +=================================================== + +.. currentmodule:: elm.ords.extraction.parse + +.. autoclass:: StructuredOrdinanceParser + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~StructuredOrdinanceParser.parse + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.parse.rst.txt b/_sources/_autosummary/elm.ords.extraction.parse.rst.txt new file mode 100644 index 00000000..99267434 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.parse.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.parse +========================= + +.. automodule:: elm.ords.extraction.parse + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + StructuredOrdinanceParser + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.extraction.rst.txt b/_sources/_autosummary/elm.ords.extraction.rst.txt new file mode 100644 index 00000000..7635c282 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.rst.txt @@ -0,0 +1,37 @@ +elm.ords.extraction +=================== + +.. automodule:: elm.ords.extraction + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.extraction.apply + elm.ords.extraction.date + elm.ords.extraction.features + elm.ords.extraction.graphs + elm.ords.extraction.ngrams + elm.ords.extraction.ordinance + elm.ords.extraction.parse + elm.ords.extraction.tree + diff --git a/_sources/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.rst.txt b/_sources/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.rst.txt new file mode 100644 index 00000000..3a11dbb1 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.rst.txt @@ -0,0 +1,38 @@ +elm.ords.extraction.tree.AsyncDecisionTree +========================================== + +.. currentmodule:: elm.ords.extraction.tree + +.. autoclass:: AsyncDecisionTree + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~AsyncDecisionTree.async_call_node + ~AsyncDecisionTree.async_run + ~AsyncDecisionTree.call_node + ~AsyncDecisionTree.run + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~AsyncDecisionTree.all_messages_txt + ~AsyncDecisionTree.api + ~AsyncDecisionTree.chat_llm_caller + ~AsyncDecisionTree.graph + ~AsyncDecisionTree.history + ~AsyncDecisionTree.messages + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.extraction.tree.rst.txt b/_sources/_autosummary/elm.ords.extraction.tree.rst.txt new file mode 100644 index 00000000..88ef05f0 --- /dev/null +++ b/_sources/_autosummary/elm.ords.extraction.tree.rst.txt @@ -0,0 +1,31 @@ +elm.ords.extraction.tree +======================== + +.. automodule:: elm.ords.extraction.tree + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + AsyncDecisionTree + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.llm.calling.BaseLLMCaller.rst.txt b/_sources/_autosummary/elm.ords.llm.calling.BaseLLMCaller.rst.txt new file mode 100644 index 00000000..d7b0941d --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.calling.BaseLLMCaller.rst.txt @@ -0,0 +1,23 @@ +elm.ords.llm.calling.BaseLLMCaller +================================== + +.. currentmodule:: elm.ords.llm.calling + +.. autoclass:: BaseLLMCaller + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.llm.calling.ChatLLMCaller.rst.txt b/_sources/_autosummary/elm.ords.llm.calling.ChatLLMCaller.rst.txt new file mode 100644 index 00000000..823898a5 --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.calling.ChatLLMCaller.rst.txt @@ -0,0 +1,24 @@ +elm.ords.llm.calling.ChatLLMCaller +================================== + +.. currentmodule:: elm.ords.llm.calling + +.. autoclass:: ChatLLMCaller + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ChatLLMCaller.call + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.llm.calling.LLMCaller.rst.txt b/_sources/_autosummary/elm.ords.llm.calling.LLMCaller.rst.txt new file mode 100644 index 00000000..24baa529 --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.calling.LLMCaller.rst.txt @@ -0,0 +1,24 @@ +elm.ords.llm.calling.LLMCaller +============================== + +.. currentmodule:: elm.ords.llm.calling + +.. autoclass:: LLMCaller + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~LLMCaller.call + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.rst.txt b/_sources/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.rst.txt new file mode 100644 index 00000000..9188ea4f --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.calling.StructuredLLMCaller.rst.txt @@ -0,0 +1,24 @@ +elm.ords.llm.calling.StructuredLLMCaller +======================================== + +.. currentmodule:: elm.ords.llm.calling + +.. autoclass:: StructuredLLMCaller + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~StructuredLLMCaller.call + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.llm.calling.rst.txt b/_sources/_autosummary/elm.ords.llm.calling.rst.txt new file mode 100644 index 00000000..62d31766 --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.calling.rst.txt @@ -0,0 +1,34 @@ +elm.ords.llm.calling +==================== + +.. automodule:: elm.ords.llm.calling + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + BaseLLMCaller + ChatLLMCaller + LLMCaller + StructuredLLMCaller + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.llm.rst.txt b/_sources/_autosummary/elm.ords.llm.rst.txt new file mode 100644 index 00000000..62ae5cae --- /dev/null +++ b/_sources/_autosummary/elm.ords.llm.rst.txt @@ -0,0 +1,30 @@ +elm.ords.llm +============ + +.. automodule:: elm.ords.llm + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.llm.calling + diff --git a/_sources/_autosummary/elm.ords.process.download_doc_for_county.rst.txt b/_sources/_autosummary/elm.ords.process.download_doc_for_county.rst.txt new file mode 100644 index 00000000..4d4ed80c --- /dev/null +++ b/_sources/_autosummary/elm.ords.process.download_doc_for_county.rst.txt @@ -0,0 +1,6 @@ +elm.ords.process.download\_doc\_for\_county +=========================================== + +.. currentmodule:: elm.ords.process + +.. autofunction:: download_doc_for_county \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.process.download_docs_for_county_with_logging.rst.txt b/_sources/_autosummary/elm.ords.process.download_docs_for_county_with_logging.rst.txt new file mode 100644 index 00000000..99d6b8d4 --- /dev/null +++ b/_sources/_autosummary/elm.ords.process.download_docs_for_county_with_logging.rst.txt @@ -0,0 +1,6 @@ +elm.ords.process.download\_docs\_for\_county\_with\_logging +=========================================================== + +.. currentmodule:: elm.ords.process + +.. autofunction:: download_docs_for_county_with_logging \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.process.process_counties_with_openai.rst.txt b/_sources/_autosummary/elm.ords.process.process_counties_with_openai.rst.txt new file mode 100644 index 00000000..8bcba92a --- /dev/null +++ b/_sources/_autosummary/elm.ords.process.process_counties_with_openai.rst.txt @@ -0,0 +1,6 @@ +elm.ords.process.process\_counties\_with\_openai +================================================ + +.. currentmodule:: elm.ords.process + +.. autofunction:: process_counties_with_openai \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.process.rst.txt b/_sources/_autosummary/elm.ords.process.rst.txt new file mode 100644 index 00000000..b40aafe9 --- /dev/null +++ b/_sources/_autosummary/elm.ords.process.rst.txt @@ -0,0 +1,32 @@ +elm.ords.process +================ + +.. automodule:: elm.ords.process + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + download_doc_for_county + download_docs_for_county_with_logging + process_counties_with_openai + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.rst.txt b/_sources/_autosummary/elm.ords.rst.txt new file mode 100644 index 00000000..34a8ec2f --- /dev/null +++ b/_sources/_autosummary/elm.ords.rst.txt @@ -0,0 +1,36 @@ +elm.ords +======== + +.. automodule:: elm.ords + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.download + elm.ords.extraction + elm.ords.llm + elm.ords.process + elm.ords.services + elm.ords.utilities + elm.ords.validation + diff --git a/_sources/_autosummary/elm.ords.services.base.RateLimitedService.rst.txt b/_sources/_autosummary/elm.ords.services.base.RateLimitedService.rst.txt new file mode 100644 index 00000000..99a039d7 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.base.RateLimitedService.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.base.RateLimitedService +========================================= + +.. currentmodule:: elm.ords.services.base + +.. autoclass:: RateLimitedService + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~RateLimitedService.acquire_resources + ~RateLimitedService.call + ~RateLimitedService.process + ~RateLimitedService.process_using_futures + ~RateLimitedService.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~RateLimitedService.MAX_CONCURRENT_JOBS + ~RateLimitedService.can_process + ~RateLimitedService.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.base.Service.rst.txt b/_sources/_autosummary/elm.ords.services.base.Service.rst.txt new file mode 100644 index 00000000..90ae38b9 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.base.Service.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.base.Service +============================== + +.. currentmodule:: elm.ords.services.base + +.. autoclass:: Service + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~Service.acquire_resources + ~Service.call + ~Service.process + ~Service.process_using_futures + ~Service.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~Service.MAX_CONCURRENT_JOBS + ~Service.can_process + ~Service.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.base.rst.txt b/_sources/_autosummary/elm.ords.services.base.rst.txt new file mode 100644 index 00000000..89200a66 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.base.rst.txt @@ -0,0 +1,32 @@ +elm.ords.services.base +====================== + +.. automodule:: elm.ords.services.base + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + RateLimitedService + Service + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.cpu.PDFLoader.rst.txt b/_sources/_autosummary/elm.ords.services.cpu.PDFLoader.rst.txt new file mode 100644 index 00000000..ec33b80f --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.cpu.PDFLoader.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.cpu.PDFLoader +=============================== + +.. currentmodule:: elm.ords.services.cpu + +.. autoclass:: PDFLoader + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~PDFLoader.acquire_resources + ~PDFLoader.call + ~PDFLoader.process + ~PDFLoader.process_using_futures + ~PDFLoader.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~PDFLoader.MAX_CONCURRENT_JOBS + ~PDFLoader.can_process + ~PDFLoader.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.cpu.ProcessPoolService.rst.txt b/_sources/_autosummary/elm.ords.services.cpu.ProcessPoolService.rst.txt new file mode 100644 index 00000000..ae33cc73 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.cpu.ProcessPoolService.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.cpu.ProcessPoolService +======================================== + +.. currentmodule:: elm.ords.services.cpu + +.. autoclass:: ProcessPoolService + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ProcessPoolService.acquire_resources + ~ProcessPoolService.call + ~ProcessPoolService.process + ~ProcessPoolService.process_using_futures + ~ProcessPoolService.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~ProcessPoolService.MAX_CONCURRENT_JOBS + ~ProcessPoolService.can_process + ~ProcessPoolService.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc.rst.txt b/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc.rst.txt new file mode 100644 index 00000000..5f29cef6 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.cpu.read\_pdf\_doc +==================================== + +.. currentmodule:: elm.ords.services.cpu + +.. autofunction:: read_pdf_doc \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.rst.txt b/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.rst.txt new file mode 100644 index 00000000..ac7d80f1 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.cpu.read\_pdf\_doc\_ocr +========================================= + +.. currentmodule:: elm.ords.services.cpu + +.. autofunction:: read_pdf_doc_ocr \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.cpu.rst.txt b/_sources/_autosummary/elm.ords.services.cpu.rst.txt new file mode 100644 index 00000000..458c1974 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.cpu.rst.txt @@ -0,0 +1,40 @@ +elm.ords.services.cpu +===================== + +.. automodule:: elm.ords.services.cpu + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + read_pdf_doc + read_pdf_doc_ocr + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + PDFLoader + ProcessPoolService + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.openai.OpenAIService.rst.txt b/_sources/_autosummary/elm.ords.services.openai.OpenAIService.rst.txt new file mode 100644 index 00000000..e44bdb9b --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.openai.OpenAIService.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.openai.OpenAIService +====================================== + +.. currentmodule:: elm.ords.services.openai + +.. autoclass:: OpenAIService + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OpenAIService.acquire_resources + ~OpenAIService.call + ~OpenAIService.process + ~OpenAIService.process_using_futures + ~OpenAIService.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OpenAIService.MAX_CONCURRENT_JOBS + ~OpenAIService.can_process + ~OpenAIService.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.openai.count_tokens.rst.txt b/_sources/_autosummary/elm.ords.services.openai.count_tokens.rst.txt new file mode 100644 index 00000000..93182983 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.openai.count_tokens.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.openai.count\_tokens +====================================== + +.. currentmodule:: elm.ords.services.openai + +.. autofunction:: count_tokens \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.openai.rst.txt b/_sources/_autosummary/elm.ords.services.openai.rst.txt new file mode 100644 index 00000000..bed3d05d --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.openai.rst.txt @@ -0,0 +1,39 @@ +elm.ords.services.openai +======================== + +.. automodule:: elm.ords.services.openai + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + count_tokens + usage_from_response + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + OpenAIService + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.openai.usage_from_response.rst.txt b/_sources/_autosummary/elm.ords.services.openai.usage_from_response.rst.txt new file mode 100644 index 00000000..f3eba6b2 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.openai.usage_from_response.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.openai.usage\_from\_response +============================================== + +.. currentmodule:: elm.ords.services.openai + +.. autofunction:: usage_from_response \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.provider.RunningAsyncServices.rst.txt b/_sources/_autosummary/elm.ords.services.provider.RunningAsyncServices.rst.txt new file mode 100644 index 00000000..a1656be3 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.provider.RunningAsyncServices.rst.txt @@ -0,0 +1,23 @@ +elm.ords.services.provider.RunningAsyncServices +=============================================== + +.. currentmodule:: elm.ords.services.provider + +.. autoclass:: RunningAsyncServices + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.provider.rst.txt b/_sources/_autosummary/elm.ords.services.provider.rst.txt new file mode 100644 index 00000000..d2134d2c --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.provider.rst.txt @@ -0,0 +1,31 @@ +elm.ords.services.provider +========================== + +.. automodule:: elm.ords.services.provider + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + RunningAsyncServices + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.queues.get_service_queue.rst.txt b/_sources/_autosummary/elm.ords.services.queues.get_service_queue.rst.txt new file mode 100644 index 00000000..bcd8eeb5 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.queues.get_service_queue.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.queues.get\_service\_queue +============================================ + +.. currentmodule:: elm.ords.services.queues + +.. autofunction:: get_service_queue \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.queues.initialize_service_queue.rst.txt b/_sources/_autosummary/elm.ords.services.queues.initialize_service_queue.rst.txt new file mode 100644 index 00000000..9ec73539 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.queues.initialize_service_queue.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.queues.initialize\_service\_queue +=================================================== + +.. currentmodule:: elm.ords.services.queues + +.. autofunction:: initialize_service_queue \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.queues.rst.txt b/_sources/_autosummary/elm.ords.services.queues.rst.txt new file mode 100644 index 00000000..5415a6a7 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.queues.rst.txt @@ -0,0 +1,32 @@ +elm.ords.services.queues +======================== + +.. automodule:: elm.ords.services.queues + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + get_service_queue + initialize_service_queue + tear_down_service_queue + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.queues.tear_down_service_queue.rst.txt b/_sources/_autosummary/elm.ords.services.queues.tear_down_service_queue.rst.txt new file mode 100644 index 00000000..fee465c5 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.queues.tear_down_service_queue.rst.txt @@ -0,0 +1,6 @@ +elm.ords.services.queues.tear\_down\_service\_queue +=================================================== + +.. currentmodule:: elm.ords.services.queues + +.. autofunction:: tear_down_service_queue \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.rst.txt b/_sources/_autosummary/elm.ords.services.rst.txt new file mode 100644 index 00000000..2c003f4f --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services +================= + +.. automodule:: elm.ords.services + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.services.base + elm.ords.services.cpu + elm.ords.services.openai + elm.ords.services.provider + elm.ords.services.queues + elm.ords.services.threaded + elm.ords.services.usage + diff --git a/_sources/_autosummary/elm.ords.services.threaded.CleanedFileWriter.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.CleanedFileWriter.rst.txt new file mode 100644 index 00000000..076a5b51 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.CleanedFileWriter.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.CleanedFileWriter +============================================ + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: CleanedFileWriter + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~CleanedFileWriter.acquire_resources + ~CleanedFileWriter.call + ~CleanedFileWriter.process + ~CleanedFileWriter.process_using_futures + ~CleanedFileWriter.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~CleanedFileWriter.MAX_CONCURRENT_JOBS + ~CleanedFileWriter.can_process + ~CleanedFileWriter.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.FileMover.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.FileMover.rst.txt new file mode 100644 index 00000000..47aa03ba --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.FileMover.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.FileMover +==================================== + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: FileMover + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~FileMover.acquire_resources + ~FileMover.call + ~FileMover.process + ~FileMover.process_using_futures + ~FileMover.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~FileMover.MAX_CONCURRENT_JOBS + ~FileMover.can_process + ~FileMover.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.rst.txt new file mode 100644 index 00000000..173fdaea --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.OrdDBFileWriter.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.OrdDBFileWriter +========================================== + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: OrdDBFileWriter + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OrdDBFileWriter.acquire_resources + ~OrdDBFileWriter.call + ~OrdDBFileWriter.process + ~OrdDBFileWriter.process_using_futures + ~OrdDBFileWriter.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OrdDBFileWriter.MAX_CONCURRENT_JOBS + ~OrdDBFileWriter.can_process + ~OrdDBFileWriter.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.rst.txt new file mode 100644 index 00000000..59975726 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.StoreFileOnDisk.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.StoreFileOnDisk +========================================== + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: StoreFileOnDisk + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~StoreFileOnDisk.acquire_resources + ~StoreFileOnDisk.call + ~StoreFileOnDisk.process + ~StoreFileOnDisk.process_using_futures + ~StoreFileOnDisk.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~StoreFileOnDisk.MAX_CONCURRENT_JOBS + ~StoreFileOnDisk.can_process + ~StoreFileOnDisk.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.TempFileCache.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.TempFileCache.rst.txt new file mode 100644 index 00000000..9cb2620c --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.TempFileCache.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.TempFileCache +======================================== + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: TempFileCache + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~TempFileCache.acquire_resources + ~TempFileCache.call + ~TempFileCache.process + ~TempFileCache.process_using_futures + ~TempFileCache.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TempFileCache.MAX_CONCURRENT_JOBS + ~TempFileCache.can_process + ~TempFileCache.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.ThreadedService.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.ThreadedService.rst.txt new file mode 100644 index 00000000..83fe4929 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.ThreadedService.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.ThreadedService +========================================== + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: ThreadedService + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ThreadedService.acquire_resources + ~ThreadedService.call + ~ThreadedService.process + ~ThreadedService.process_using_futures + ~ThreadedService.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~ThreadedService.MAX_CONCURRENT_JOBS + ~ThreadedService.can_process + ~ThreadedService.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.UsageUpdater.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.UsageUpdater.rst.txt new file mode 100644 index 00000000..3e7f0b07 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.UsageUpdater.rst.txt @@ -0,0 +1,36 @@ +elm.ords.services.threaded.UsageUpdater +======================================= + +.. currentmodule:: elm.ords.services.threaded + +.. autoclass:: UsageUpdater + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~UsageUpdater.acquire_resources + ~UsageUpdater.call + ~UsageUpdater.process + ~UsageUpdater.process_using_futures + ~UsageUpdater.release_resources + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~UsageUpdater.MAX_CONCURRENT_JOBS + ~UsageUpdater.can_process + ~UsageUpdater.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.threaded.rst.txt b/_sources/_autosummary/elm.ords.services.threaded.rst.txt new file mode 100644 index 00000000..e3d00b38 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.threaded.rst.txt @@ -0,0 +1,37 @@ +elm.ords.services.threaded +========================== + +.. automodule:: elm.ords.services.threaded + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + CleanedFileWriter + FileMover + OrdDBFileWriter + StoreFileOnDisk + TempFileCache + ThreadedService + UsageUpdater + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.rst.txt b/_sources/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.rst.txt new file mode 100644 index 00000000..a2f89519 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.rst.txt @@ -0,0 +1,30 @@ +elm.ords.services.usage.TimeBoundedUsageTracker +=============================================== + +.. currentmodule:: elm.ords.services.usage + +.. autoclass:: TimeBoundedUsageTracker + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~TimeBoundedUsageTracker.add + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~TimeBoundedUsageTracker.total + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.usage.TimedEntry.rst.txt b/_sources/_autosummary/elm.ords.services.usage.TimedEntry.rst.txt new file mode 100644 index 00000000..decd31e2 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.usage.TimedEntry.rst.txt @@ -0,0 +1,23 @@ +elm.ords.services.usage.TimedEntry +================================== + +.. currentmodule:: elm.ords.services.usage + +.. autoclass:: TimedEntry + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.usage.UsageTracker.rst.txt b/_sources/_autosummary/elm.ords.services.usage.UsageTracker.rst.txt new file mode 100644 index 00000000..513bbb01 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.usage.UsageTracker.rst.txt @@ -0,0 +1,42 @@ +elm.ords.services.usage.UsageTracker +==================================== + +.. currentmodule:: elm.ords.services.usage + +.. autoclass:: UsageTracker + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~UsageTracker.add_to + ~UsageTracker.clear + ~UsageTracker.copy + ~UsageTracker.fromkeys + ~UsageTracker.get + ~UsageTracker.items + ~UsageTracker.keys + ~UsageTracker.pop + ~UsageTracker.popitem + ~UsageTracker.setdefault + ~UsageTracker.update + ~UsageTracker.update_from_model + ~UsageTracker.values + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~UsageTracker.totals + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.services.usage.rst.txt b/_sources/_autosummary/elm.ords.services.usage.rst.txt new file mode 100644 index 00000000..7e02fc19 --- /dev/null +++ b/_sources/_autosummary/elm.ords.services.usage.rst.txt @@ -0,0 +1,33 @@ +elm.ords.services.usage +======================= + +.. automodule:: elm.ords.services.usage + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + TimeBoundedUsageTracker + TimedEntry + UsageTracker + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.counties.county_websites.rst.txt b/_sources/_autosummary/elm.ords.utilities.counties.county_websites.rst.txt new file mode 100644 index 00000000..8982dcd3 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.counties.county_websites.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.counties.county\_websites +============================================ + +.. currentmodule:: elm.ords.utilities.counties + +.. autofunction:: county_websites \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.counties.load_all_county_info.rst.txt b/_sources/_autosummary/elm.ords.utilities.counties.load_all_county_info.rst.txt new file mode 100644 index 00000000..5c50c11b --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.counties.load_all_county_info.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.counties.load\_all\_county\_info +=================================================== + +.. currentmodule:: elm.ords.utilities.counties + +.. autofunction:: load_all_county_info \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.rst.txt b/_sources/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.rst.txt new file mode 100644 index 00000000..14838dcd --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.counties.load_counties_from_fp.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.counties.load\_counties\_from\_fp +==================================================== + +.. currentmodule:: elm.ords.utilities.counties + +.. autofunction:: load_counties_from_fp \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.counties.rst.txt b/_sources/_autosummary/elm.ords.utilities.counties.rst.txt new file mode 100644 index 00000000..3b047072 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.counties.rst.txt @@ -0,0 +1,32 @@ +elm.ords.utilities.counties +=========================== + +.. automodule:: elm.ords.utilities.counties + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + county_websites + load_all_county_info + load_counties_from_fp + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.rst.txt b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.rst.txt new file mode 100644 index 00000000..4c0ea902 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.exceptions.ELMOrdsError +========================================== + +.. currentmodule:: elm.ords.utilities.exceptions + +.. autoexception:: ELMOrdsError \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.rst.txt b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.rst.txt new file mode 100644 index 00000000..32dc02db --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.exceptions.ELMOrdsNotInitializedError +======================================================== + +.. currentmodule:: elm.ords.utilities.exceptions + +.. autoexception:: ELMOrdsNotInitializedError \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.rst.txt b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.rst.txt new file mode 100644 index 00000000..50302e3d --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.exceptions.ELMOrdsRuntimeError +================================================= + +.. currentmodule:: elm.ords.utilities.exceptions + +.. autoexception:: ELMOrdsRuntimeError \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.rst.txt b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.rst.txt new file mode 100644 index 00000000..4d9267c2 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.exceptions.ELMOrdsValueError +=============================================== + +.. currentmodule:: elm.ords.utilities.exceptions + +.. autoexception:: ELMOrdsValueError \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.exceptions.rst.txt b/_sources/_autosummary/elm.ords.utilities.exceptions.rst.txt new file mode 100644 index 00000000..119da9dc --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.exceptions.rst.txt @@ -0,0 +1,33 @@ +elm.ords.utilities.exceptions +============================= + +.. automodule:: elm.ords.utilities.exceptions + + + + + + + + + + + + + + + + .. rubric:: Exceptions + + .. autosummary:: + :toctree: + + ELMOrdsError + ELMOrdsNotInitializedError + ELMOrdsRuntimeError + ELMOrdsValueError + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.location.County.rst.txt b/_sources/_autosummary/elm.ords.utilities.location.County.rst.txt new file mode 100644 index 00000000..96c1a3f0 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.location.County.rst.txt @@ -0,0 +1,29 @@ +elm.ords.utilities.location.County +================================== + +.. currentmodule:: elm.ords.utilities.location + +.. autoclass:: County + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~County.full_name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.location.Location.rst.txt b/_sources/_autosummary/elm.ords.utilities.location.Location.rst.txt new file mode 100644 index 00000000..e9da1c44 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.location.Location.rst.txt @@ -0,0 +1,29 @@ +elm.ords.utilities.location.Location +==================================== + +.. currentmodule:: elm.ords.utilities.location + +.. autoclass:: Location + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~Location.full_name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.location.rst.txt b/_sources/_autosummary/elm.ords.utilities.location.rst.txt new file mode 100644 index 00000000..fd0ea5ae --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.location.rst.txt @@ -0,0 +1,32 @@ +elm.ords.utilities.location +=========================== + +.. automodule:: elm.ords.utilities.location + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + County + Location + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.rst.txt b/_sources/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.rst.txt new file mode 100644 index 00000000..506ed730 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.parsing.llm_response_as_json.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.parsing.llm\_response\_as\_json +================================================== + +.. currentmodule:: elm.ords.utilities.parsing + +.. autofunction:: llm_response_as_json \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.rst.txt b/_sources/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.rst.txt new file mode 100644 index 00000000..c393e6fd --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.rst.txt @@ -0,0 +1,6 @@ +elm.ords.utilities.parsing.merge\_overlapping\_texts +==================================================== + +.. currentmodule:: elm.ords.utilities.parsing + +.. autofunction:: merge_overlapping_texts \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.parsing.rst.txt b/_sources/_autosummary/elm.ords.utilities.parsing.rst.txt new file mode 100644 index 00000000..54ec6b78 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.parsing.rst.txt @@ -0,0 +1,31 @@ +elm.ords.utilities.parsing +========================== + +.. automodule:: elm.ords.utilities.parsing + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + llm_response_as_json + merge_overlapping_texts + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.rst.txt new file mode 100644 index 00000000..1e1ec74c --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.rst.txt @@ -0,0 +1,47 @@ +elm.ords.utilities.queued\_logging.LocalProcessQueueHandler +=========================================================== + +.. currentmodule:: elm.ords.utilities.queued_logging + +.. autoclass:: LocalProcessQueueHandler + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~LocalProcessQueueHandler.acquire + ~LocalProcessQueueHandler.addFilter + ~LocalProcessQueueHandler.close + ~LocalProcessQueueHandler.createLock + ~LocalProcessQueueHandler.emit + ~LocalProcessQueueHandler.enqueue + ~LocalProcessQueueHandler.filter + ~LocalProcessQueueHandler.flush + ~LocalProcessQueueHandler.format + ~LocalProcessQueueHandler.get_name + ~LocalProcessQueueHandler.handle + ~LocalProcessQueueHandler.handleError + ~LocalProcessQueueHandler.prepare + ~LocalProcessQueueHandler.release + ~LocalProcessQueueHandler.removeFilter + ~LocalProcessQueueHandler.setFormatter + ~LocalProcessQueueHandler.setLevel + ~LocalProcessQueueHandler.set_name + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~LocalProcessQueueHandler.name + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.rst.txt new file mode 100644 index 00000000..76601af5 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.rst.txt @@ -0,0 +1,23 @@ +elm.ords.utilities.queued\_logging.LocationFileLog +================================================== + +.. currentmodule:: elm.ords.utilities.queued_logging + +.. autoclass:: LocationFileLog + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.rst.txt new file mode 100644 index 00000000..8fb5f213 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.LocationFilter.rst.txt @@ -0,0 +1,24 @@ +elm.ords.utilities.queued\_logging.LocationFilter +================================================= + +.. currentmodule:: elm.ords.utilities.queued_logging + +.. autoclass:: LocationFilter + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~LocationFilter.filter + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.LogListener.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.LogListener.rst.txt new file mode 100644 index 00000000..9152e156 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.LogListener.rst.txt @@ -0,0 +1,25 @@ +elm.ords.utilities.queued\_logging.LogListener +============================================== + +.. currentmodule:: elm.ords.utilities.queued_logging + +.. autoclass:: LogListener + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~LogListener.addHandler + ~LogListener.removeHandler + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.rst.txt new file mode 100644 index 00000000..4c307618 --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.rst.txt @@ -0,0 +1,24 @@ +elm.ords.utilities.queued\_logging.NoLocationFilter +=================================================== + +.. currentmodule:: elm.ords.utilities.queued_logging + +.. autoclass:: NoLocationFilter + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~NoLocationFilter.filter + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.utilities.queued_logging.rst.txt b/_sources/_autosummary/elm.ords.utilities.queued_logging.rst.txt new file mode 100644 index 00000000..3679b36b --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.queued_logging.rst.txt @@ -0,0 +1,35 @@ +elm.ords.utilities.queued\_logging +================================== + +.. automodule:: elm.ords.utilities.queued_logging + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + LocalProcessQueueHandler + LocationFileLog + LocationFilter + LogListener + NoLocationFilter + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.utilities.rst.txt b/_sources/_autosummary/elm.ords.utilities.rst.txt new file mode 100644 index 00000000..a0a9188b --- /dev/null +++ b/_sources/_autosummary/elm.ords.utilities.rst.txt @@ -0,0 +1,34 @@ +elm.ords.utilities +================== + +.. automodule:: elm.ords.utilities + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.utilities.counties + elm.ords.utilities.exceptions + elm.ords.utilities.location + elm.ords.utilities.parsing + elm.ords.utilities.queued_logging + diff --git a/_sources/_autosummary/elm.ords.validation.content.ValidationWithMemory.rst.txt b/_sources/_autosummary/elm.ords.validation.content.ValidationWithMemory.rst.txt new file mode 100644 index 00000000..07ff3e21 --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.content.ValidationWithMemory.rst.txt @@ -0,0 +1,24 @@ +elm.ords.validation.content.ValidationWithMemory +================================================ + +.. currentmodule:: elm.ords.validation.content + +.. autoclass:: ValidationWithMemory + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~ValidationWithMemory.parse_from_ind + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.content.possibly_mentions_wind.rst.txt b/_sources/_autosummary/elm.ords.validation.content.possibly_mentions_wind.rst.txt new file mode 100644 index 00000000..2a3d2bbd --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.content.possibly_mentions_wind.rst.txt @@ -0,0 +1,6 @@ +elm.ords.validation.content.possibly\_mentions\_wind +==================================================== + +.. currentmodule:: elm.ords.validation.content + +.. autofunction:: possibly_mentions_wind \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.content.rst.txt b/_sources/_autosummary/elm.ords.validation.content.rst.txt new file mode 100644 index 00000000..d436c81d --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.content.rst.txt @@ -0,0 +1,38 @@ +elm.ords.validation.content +=========================== + +.. automodule:: elm.ords.validation.content + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + possibly_mentions_wind + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + ValidationWithMemory + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.rst.txt b/_sources/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.rst.txt new file mode 100644 index 00000000..0e95623a --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.rst.txt @@ -0,0 +1,30 @@ +elm.ords.validation.location.CountyJurisdictionValidator +======================================================== + +.. currentmodule:: elm.ords.validation.location + +.. autoclass:: CountyJurisdictionValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~CountyJurisdictionValidator.check + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~CountyJurisdictionValidator.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.location.CountyNameValidator.rst.txt b/_sources/_autosummary/elm.ords.validation.location.CountyNameValidator.rst.txt new file mode 100644 index 00000000..fd8d7da8 --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.CountyNameValidator.rst.txt @@ -0,0 +1,30 @@ +elm.ords.validation.location.CountyNameValidator +================================================ + +.. currentmodule:: elm.ords.validation.location + +.. autoclass:: CountyNameValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~CountyNameValidator.check + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~CountyNameValidator.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.location.CountyValidator.rst.txt b/_sources/_autosummary/elm.ords.validation.location.CountyValidator.rst.txt new file mode 100644 index 00000000..22a2829f --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.CountyValidator.rst.txt @@ -0,0 +1,24 @@ +elm.ords.validation.location.CountyValidator +============================================ + +.. currentmodule:: elm.ords.validation.location + +.. autoclass:: CountyValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~CountyValidator.check + + + + + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.location.FixedMessageValidator.rst.txt b/_sources/_autosummary/elm.ords.validation.location.FixedMessageValidator.rst.txt new file mode 100644 index 00000000..6d6720b4 --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.FixedMessageValidator.rst.txt @@ -0,0 +1,30 @@ +elm.ords.validation.location.FixedMessageValidator +================================================== + +.. currentmodule:: elm.ords.validation.location + +.. autoclass:: FixedMessageValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~FixedMessageValidator.check + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~FixedMessageValidator.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.location.URLValidator.rst.txt b/_sources/_autosummary/elm.ords.validation.location.URLValidator.rst.txt new file mode 100644 index 00000000..4b62dbb3 --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.URLValidator.rst.txt @@ -0,0 +1,30 @@ +elm.ords.validation.location.URLValidator +========================================= + +.. currentmodule:: elm.ords.validation.location + +.. autoclass:: URLValidator + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~URLValidator.check + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~URLValidator.SYSTEM_MESSAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.ords.validation.location.rst.txt b/_sources/_autosummary/elm.ords.validation.location.rst.txt new file mode 100644 index 00000000..bbfdfa0d --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.location.rst.txt @@ -0,0 +1,35 @@ +elm.ords.validation.location +============================ + +.. automodule:: elm.ords.validation.location + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + CountyJurisdictionValidator + CountyNameValidator + CountyValidator + FixedMessageValidator + URLValidator + + + + + + + + + diff --git a/_sources/_autosummary/elm.ords.validation.rst.txt b/_sources/_autosummary/elm.ords.validation.rst.txt new file mode 100644 index 00000000..6803b842 --- /dev/null +++ b/_sources/_autosummary/elm.ords.validation.rst.txt @@ -0,0 +1,31 @@ +elm.ords.validation +=================== + +.. automodule:: elm.ords.validation + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.ords.validation.content + elm.ords.validation.location + diff --git a/_sources/_autosummary/elm.osti.OstiList.rst.txt b/_sources/_autosummary/elm.osti.OstiList.rst.txt new file mode 100644 index 00000000..b5471bf7 --- /dev/null +++ b/_sources/_autosummary/elm.osti.OstiList.rst.txt @@ -0,0 +1,43 @@ +elm.osti.OstiList +================= + +.. currentmodule:: elm.osti + +.. autoclass:: OstiList + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OstiList.append + ~OstiList.clear + ~OstiList.copy + ~OstiList.count + ~OstiList.download + ~OstiList.extend + ~OstiList.from_osti_ids + ~OstiList.index + ~OstiList.insert + ~OstiList.pop + ~OstiList.remove + ~OstiList.reverse + ~OstiList.sort + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OstiList.BASE_URL + ~OstiList.meta + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.osti.OstiRecord.rst.txt b/_sources/_autosummary/elm.osti.OstiRecord.rst.txt new file mode 100644 index 00000000..89f3bf54 --- /dev/null +++ b/_sources/_autosummary/elm.osti.OstiRecord.rst.txt @@ -0,0 +1,48 @@ +elm.osti.OstiRecord +=================== + +.. currentmodule:: elm.osti + +.. autoclass:: OstiRecord + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~OstiRecord.clear + ~OstiRecord.copy + ~OstiRecord.download + ~OstiRecord.fromkeys + ~OstiRecord.get + ~OstiRecord.items + ~OstiRecord.keys + ~OstiRecord.pop + ~OstiRecord.popitem + ~OstiRecord.setdefault + ~OstiRecord.strip_nested_brackets + ~OstiRecord.update + ~OstiRecord.values + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~OstiRecord.authors + ~OstiRecord.date + ~OstiRecord.doi + ~OstiRecord.osti_id + ~OstiRecord.title + ~OstiRecord.url + ~OstiRecord.year + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.osti.rst.txt b/_sources/_autosummary/elm.osti.rst.txt new file mode 100644 index 00000000..0f490ba7 --- /dev/null +++ b/_sources/_autosummary/elm.osti.rst.txt @@ -0,0 +1,32 @@ +elm.osti +======== + +.. automodule:: elm.osti + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + OstiList + OstiRecord + + + + + + + + + diff --git a/_sources/_autosummary/elm.pdf.PDFtoTXT.rst.txt b/_sources/_autosummary/elm.pdf.PDFtoTXT.rst.txt new file mode 100644 index 00000000..e81befdb --- /dev/null +++ b/_sources/_autosummary/elm.pdf.PDFtoTXT.rst.txt @@ -0,0 +1,52 @@ +elm.pdf.PDFtoTXT +================ + +.. currentmodule:: elm.pdf + +.. autoclass:: PDFtoTXT + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~PDFtoTXT.call_api + ~PDFtoTXT.call_api_async + ~PDFtoTXT.chat + ~PDFtoTXT.clean_headers + ~PDFtoTXT.clean_poppler + ~PDFtoTXT.clean_txt + ~PDFtoTXT.clean_txt_async + ~PDFtoTXT.clear + ~PDFtoTXT.count_tokens + ~PDFtoTXT.generic_async_query + ~PDFtoTXT.generic_query + ~PDFtoTXT.get_embedding + ~PDFtoTXT.is_double_col + ~PDFtoTXT.load_pdf + ~PDFtoTXT.make_gpt_messages + ~PDFtoTXT.validate_clean + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~PDFtoTXT.DEFAULT_MODEL + ~PDFtoTXT.EMBEDDING_MODEL + ~PDFtoTXT.EMBEDDING_URL + ~PDFtoTXT.HEADERS + ~PDFtoTXT.MODEL_INSTRUCTION + ~PDFtoTXT.MODEL_ROLE + ~PDFtoTXT.URL + ~PDFtoTXT.all_messages_txt + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.pdf.rst.txt b/_sources/_autosummary/elm.pdf.rst.txt new file mode 100644 index 00000000..892a60a3 --- /dev/null +++ b/_sources/_autosummary/elm.pdf.rst.txt @@ -0,0 +1,31 @@ +elm.pdf +======= + +.. automodule:: elm.pdf + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + PDFtoTXT + + + + + + + + + diff --git a/_sources/_autosummary/elm.rst.txt b/_sources/_autosummary/elm.rst.txt new file mode 100644 index 00000000..4d473a00 --- /dev/null +++ b/_sources/_autosummary/elm.rst.txt @@ -0,0 +1,43 @@ +elm +=== + +.. automodule:: elm + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.base + elm.chunk + elm.cli + elm.embed + elm.exceptions + elm.ords + elm.osti + elm.pdf + elm.summary + elm.tree + elm.utilities + elm.version + elm.web + elm.wizard + diff --git a/_sources/_autosummary/elm.summary.Summary.rst.txt b/_sources/_autosummary/elm.summary.Summary.rst.txt new file mode 100644 index 00000000..9ac3d432 --- /dev/null +++ b/_sources/_autosummary/elm.summary.Summary.rst.txt @@ -0,0 +1,47 @@ +elm.summary.Summary +=================== + +.. currentmodule:: elm.summary + +.. autoclass:: Summary + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~Summary.call_api + ~Summary.call_api_async + ~Summary.chat + ~Summary.clear + ~Summary.combine + ~Summary.count_tokens + ~Summary.generic_async_query + ~Summary.generic_query + ~Summary.get_embedding + ~Summary.run + ~Summary.run_async + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~Summary.DEFAULT_MODEL + ~Summary.EMBEDDING_MODEL + ~Summary.EMBEDDING_URL + ~Summary.HEADERS + ~Summary.MODEL_INSTRUCTION + ~Summary.MODEL_ROLE + ~Summary.URL + ~Summary.all_messages_txt + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.summary.rst.txt b/_sources/_autosummary/elm.summary.rst.txt new file mode 100644 index 00000000..9b7ba323 --- /dev/null +++ b/_sources/_autosummary/elm.summary.rst.txt @@ -0,0 +1,31 @@ +elm.summary +=========== + +.. automodule:: elm.summary + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + Summary + + + + + + + + + diff --git a/_sources/_autosummary/elm.tree.DecisionTree.rst.txt b/_sources/_autosummary/elm.tree.DecisionTree.rst.txt new file mode 100644 index 00000000..3d88e8d7 --- /dev/null +++ b/_sources/_autosummary/elm.tree.DecisionTree.rst.txt @@ -0,0 +1,35 @@ +elm.tree.DecisionTree +===================== + +.. currentmodule:: elm.tree + +.. autoclass:: DecisionTree + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~DecisionTree.call_node + ~DecisionTree.run + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~DecisionTree.all_messages_txt + ~DecisionTree.api + ~DecisionTree.graph + ~DecisionTree.history + ~DecisionTree.messages + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.tree.rst.txt b/_sources/_autosummary/elm.tree.rst.txt new file mode 100644 index 00000000..4b6a4abf --- /dev/null +++ b/_sources/_autosummary/elm.tree.rst.txt @@ -0,0 +1,31 @@ +elm.tree +======== + +.. automodule:: elm.tree + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + DecisionTree + + + + + + + + + diff --git a/_sources/_autosummary/elm.utilities.parse.clean_headers.rst.txt b/_sources/_autosummary/elm.utilities.parse.clean_headers.rst.txt new file mode 100644 index 00000000..f1d129a0 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.clean_headers.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.clean\_headers +================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: clean_headers \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.combine_pages.rst.txt b/_sources/_autosummary/elm.utilities.parse.combine_pages.rst.txt new file mode 100644 index 00000000..b511870e --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.combine_pages.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.combine\_pages +================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: combine_pages \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.format_html_tables.rst.txt b/_sources/_autosummary/elm.utilities.parse.format_html_tables.rst.txt new file mode 100644 index 00000000..fae53c70 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.format_html_tables.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.format\_html\_tables +======================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: format_html_tables \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.html_to_text.rst.txt b/_sources/_autosummary/elm.utilities.parse.html_to_text.rst.txt new file mode 100644 index 00000000..57fc20e3 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.html_to_text.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.html\_to\_text +================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: html_to_text \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.is_multi_col.rst.txt b/_sources/_autosummary/elm.utilities.parse.is_multi_col.rst.txt new file mode 100644 index 00000000..db8ec933 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.is_multi_col.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.is\_multi\_col +================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: is_multi_col \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.read_pdf.rst.txt b/_sources/_autosummary/elm.utilities.parse.read_pdf.rst.txt new file mode 100644 index 00000000..4b086eb3 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.read_pdf.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.read\_pdf +============================= + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: read_pdf \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.read_pdf_ocr.rst.txt b/_sources/_autosummary/elm.utilities.parse.read_pdf_ocr.rst.txt new file mode 100644 index 00000000..b176d9fe --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.read_pdf_ocr.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.read\_pdf\_ocr +================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: read_pdf_ocr \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.remove_blank_pages.rst.txt b/_sources/_autosummary/elm.utilities.parse.remove_blank_pages.rst.txt new file mode 100644 index 00000000..720a4b36 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.remove_blank_pages.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.remove\_blank\_pages +======================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: remove_blank_pages \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.rst.txt b/_sources/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.rst.txt new file mode 100644 index 00000000..24dc07d2 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.remove\_empty\_lines\_or\_page\_footers +=========================================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: remove_empty_lines_or_page_footers \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.rst.txt b/_sources/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.rst.txt new file mode 100644 index 00000000..2b16fb10 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.replace\_common\_pdf\_conversion\_chars +=========================================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: replace_common_pdf_conversion_chars \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.replace_excessive_newlines.rst.txt b/_sources/_autosummary/elm.utilities.parse.replace_excessive_newlines.rst.txt new file mode 100644 index 00000000..f79426c7 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.replace_excessive_newlines.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.replace\_excessive\_newlines +================================================ + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: replace_excessive_newlines \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.replace_multi_dot_lines.rst.txt b/_sources/_autosummary/elm.utilities.parse.replace_multi_dot_lines.rst.txt new file mode 100644 index 00000000..f2e41a99 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.replace_multi_dot_lines.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.parse.replace\_multi\_dot\_lines +============================================== + +.. currentmodule:: elm.utilities.parse + +.. autofunction:: replace_multi_dot_lines \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.parse.rst.txt b/_sources/_autosummary/elm.utilities.parse.rst.txt new file mode 100644 index 00000000..da2ac063 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.parse.rst.txt @@ -0,0 +1,41 @@ +elm.utilities.parse +=================== + +.. automodule:: elm.utilities.parse + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + clean_headers + combine_pages + format_html_tables + html_to_text + is_multi_col + read_pdf + read_pdf_ocr + remove_blank_pages + remove_empty_lines_or_page_footers + replace_common_pdf_conversion_chars + replace_excessive_newlines + replace_multi_dot_lines + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.rst.txt b/_sources/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.rst.txt new file mode 100644 index 00000000..12171381 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.retry.async\_retry\_with\_exponential\_backoff +============================================================ + +.. currentmodule:: elm.utilities.retry + +.. autofunction:: async_retry_with_exponential_backoff \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.rst.txt b/_sources/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.rst.txt new file mode 100644 index 00000000..ee2344c5 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.retry.retry_with_exponential_backoff.rst.txt @@ -0,0 +1,6 @@ +elm.utilities.retry.retry\_with\_exponential\_backoff +===================================================== + +.. currentmodule:: elm.utilities.retry + +.. autofunction:: retry_with_exponential_backoff \ No newline at end of file diff --git a/_sources/_autosummary/elm.utilities.retry.rst.txt b/_sources/_autosummary/elm.utilities.retry.rst.txt new file mode 100644 index 00000000..a5877ea7 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.retry.rst.txt @@ -0,0 +1,31 @@ +elm.utilities.retry +=================== + +.. automodule:: elm.utilities.retry + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + async_retry_with_exponential_backoff + retry_with_exponential_backoff + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.utilities.rst.txt b/_sources/_autosummary/elm.utilities.rst.txt new file mode 100644 index 00000000..f3064927 --- /dev/null +++ b/_sources/_autosummary/elm.utilities.rst.txt @@ -0,0 +1,31 @@ +elm.utilities +============= + +.. automodule:: elm.utilities + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.utilities.parse + elm.utilities.retry + diff --git a/_sources/_autosummary/elm.version.rst.txt b/_sources/_autosummary/elm.version.rst.txt new file mode 100644 index 00000000..dbddecd2 --- /dev/null +++ b/_sources/_autosummary/elm.version.rst.txt @@ -0,0 +1,23 @@ +elm.version +=========== + +.. automodule:: elm.version + + + + + + + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.document.BaseDocument.rst.txt b/_sources/_autosummary/elm.web.document.BaseDocument.rst.txt new file mode 100644 index 00000000..a211d639 --- /dev/null +++ b/_sources/_autosummary/elm.web.document.BaseDocument.rst.txt @@ -0,0 +1,33 @@ +elm.web.document.BaseDocument +============================= + +.. currentmodule:: elm.web.document + +.. autoclass:: BaseDocument + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~BaseDocument.FILE_EXTENSION + ~BaseDocument.WRITE_KWARGS + ~BaseDocument.empty + ~BaseDocument.raw_pages + ~BaseDocument.text + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.document.HTMLDocument.rst.txt b/_sources/_autosummary/elm.web.document.HTMLDocument.rst.txt new file mode 100644 index 00000000..558c9a9f --- /dev/null +++ b/_sources/_autosummary/elm.web.document.HTMLDocument.rst.txt @@ -0,0 +1,34 @@ +elm.web.document.HTMLDocument +============================= + +.. currentmodule:: elm.web.document + +.. autoclass:: HTMLDocument + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~HTMLDocument.FILE_EXTENSION + ~HTMLDocument.HTML_TABLE_TO_MARKDOWN_KWARGS + ~HTMLDocument.WRITE_KWARGS + ~HTMLDocument.empty + ~HTMLDocument.raw_pages + ~HTMLDocument.text + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.document.PDFDocument.rst.txt b/_sources/_autosummary/elm.web.document.PDFDocument.rst.txt new file mode 100644 index 00000000..ea6fecbf --- /dev/null +++ b/_sources/_autosummary/elm.web.document.PDFDocument.rst.txt @@ -0,0 +1,35 @@ +elm.web.document.PDFDocument +============================ + +.. currentmodule:: elm.web.document + +.. autoclass:: PDFDocument + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~PDFDocument.CLEAN_HEADER_KWARGS + ~PDFDocument.FILE_EXTENSION + ~PDFDocument.WRITE_KWARGS + ~PDFDocument.empty + ~PDFDocument.num_raw_pages_to_keep + ~PDFDocument.raw_pages + ~PDFDocument.text + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.document.rst.txt b/_sources/_autosummary/elm.web.document.rst.txt new file mode 100644 index 00000000..5b6f9b3b --- /dev/null +++ b/_sources/_autosummary/elm.web.document.rst.txt @@ -0,0 +1,33 @@ +elm.web.document +================ + +.. automodule:: elm.web.document + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + BaseDocument + HTMLDocument + PDFDocument + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.file_loader.AsyncFileLoader.rst.txt b/_sources/_autosummary/elm.web.file_loader.AsyncFileLoader.rst.txt new file mode 100644 index 00000000..9883e3da --- /dev/null +++ b/_sources/_autosummary/elm.web.file_loader.AsyncFileLoader.rst.txt @@ -0,0 +1,31 @@ +elm.web.file\_loader.AsyncFileLoader +==================================== + +.. currentmodule:: elm.web.file_loader + +.. autoclass:: AsyncFileLoader + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~AsyncFileLoader.fetch + ~AsyncFileLoader.fetch_all + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~AsyncFileLoader.DEFAULT_HEADER_TEMPLATE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.file_loader.rst.txt b/_sources/_autosummary/elm.web.file_loader.rst.txt new file mode 100644 index 00000000..21d9e243 --- /dev/null +++ b/_sources/_autosummary/elm.web.file_loader.rst.txt @@ -0,0 +1,31 @@ +elm.web.file\_loader +==================== + +.. automodule:: elm.web.file_loader + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + AsyncFileLoader + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.rst.txt b/_sources/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.rst.txt new file mode 100644 index 00000000..4771f78a --- /dev/null +++ b/_sources/_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.rst.txt @@ -0,0 +1,30 @@ +elm.web.google\_search.PlaywrightGoogleLinkSearch +================================================= + +.. currentmodule:: elm.web.google_search + +.. autoclass:: PlaywrightGoogleLinkSearch + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~PlaywrightGoogleLinkSearch.results + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~PlaywrightGoogleLinkSearch.EXPECTED_RESULTS_PER_PAGE + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.google_search.rst.txt b/_sources/_autosummary/elm.web.google_search.rst.txt new file mode 100644 index 00000000..4c581233 --- /dev/null +++ b/_sources/_autosummary/elm.web.google_search.rst.txt @@ -0,0 +1,31 @@ +elm.web.google\_search +====================== + +.. automodule:: elm.web.google_search + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + PlaywrightGoogleLinkSearch + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.html_pw.load_html_with_pw.rst.txt b/_sources/_autosummary/elm.web.html_pw.load_html_with_pw.rst.txt new file mode 100644 index 00000000..668ea030 --- /dev/null +++ b/_sources/_autosummary/elm.web.html_pw.load_html_with_pw.rst.txt @@ -0,0 +1,6 @@ +elm.web.html\_pw.load\_html\_with\_pw +===================================== + +.. currentmodule:: elm.web.html_pw + +.. autofunction:: load_html_with_pw \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.html_pw.rst.txt b/_sources/_autosummary/elm.web.html_pw.rst.txt new file mode 100644 index 00000000..dbafd6de --- /dev/null +++ b/_sources/_autosummary/elm.web.html_pw.rst.txt @@ -0,0 +1,30 @@ +elm.web.html\_pw +================ + +.. automodule:: elm.web.html_pw + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + load_html_with_pw + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.rst.txt b/_sources/_autosummary/elm.web.rst.txt new file mode 100644 index 00000000..ed775a2d --- /dev/null +++ b/_sources/_autosummary/elm.web.rst.txt @@ -0,0 +1,34 @@ +elm.web +======= + +.. automodule:: elm.web + + + + + + + + + + + + + + + + + + + +.. autosummary:: + :toctree: + :template: custom-module-template.rst + :recursive: + + elm.web.document + elm.web.file_loader + elm.web.google_search + elm.web.html_pw + elm.web.utilities + diff --git a/_sources/_autosummary/elm.web.utilities.clean_search_query.rst.txt b/_sources/_autosummary/elm.web.utilities.clean_search_query.rst.txt new file mode 100644 index 00000000..3bcf487f --- /dev/null +++ b/_sources/_autosummary/elm.web.utilities.clean_search_query.rst.txt @@ -0,0 +1,6 @@ +elm.web.utilities.clean\_search\_query +====================================== + +.. currentmodule:: elm.web.utilities + +.. autofunction:: clean_search_query \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.utilities.compute_fn_from_url.rst.txt b/_sources/_autosummary/elm.web.utilities.compute_fn_from_url.rst.txt new file mode 100644 index 00000000..3fe96edf --- /dev/null +++ b/_sources/_autosummary/elm.web.utilities.compute_fn_from_url.rst.txt @@ -0,0 +1,6 @@ +elm.web.utilities.compute\_fn\_from\_url +======================================== + +.. currentmodule:: elm.web.utilities + +.. autofunction:: compute_fn_from_url \ No newline at end of file diff --git a/_sources/_autosummary/elm.web.utilities.rst.txt b/_sources/_autosummary/elm.web.utilities.rst.txt new file mode 100644 index 00000000..0fa3732c --- /dev/null +++ b/_sources/_autosummary/elm.web.utilities.rst.txt @@ -0,0 +1,32 @@ +elm.web.utilities +================= + +.. automodule:: elm.web.utilities + + + + + + + + .. rubric:: Functions + + .. autosummary:: + :toctree: + + clean_search_query + compute_fn_from_url + write_url_doc_to_file + + + + + + + + + + + + + diff --git a/_sources/_autosummary/elm.web.utilities.write_url_doc_to_file.rst.txt b/_sources/_autosummary/elm.web.utilities.write_url_doc_to_file.rst.txt new file mode 100644 index 00000000..6982556f --- /dev/null +++ b/_sources/_autosummary/elm.web.utilities.write_url_doc_to_file.rst.txt @@ -0,0 +1,6 @@ +elm.web.utilities.write\_url\_doc\_to\_file +=========================================== + +.. currentmodule:: elm.web.utilities + +.. autofunction:: write_url_doc_to_file \ No newline at end of file diff --git a/_sources/_autosummary/elm.wizard.EnergyWizard.rst.txt b/_sources/_autosummary/elm.wizard.EnergyWizard.rst.txt new file mode 100644 index 00000000..9c341433 --- /dev/null +++ b/_sources/_autosummary/elm.wizard.EnergyWizard.rst.txt @@ -0,0 +1,49 @@ +elm.wizard.EnergyWizard +======================= + +.. currentmodule:: elm.wizard + +.. autoclass:: EnergyWizard + :members: + :show-inheritance: + :inherited-members: + :special-members: __call__, __add__, __mul__ + + + + .. rubric:: Methods + + .. autosummary:: + + ~EnergyWizard.call_api + ~EnergyWizard.call_api_async + ~EnergyWizard.chat + ~EnergyWizard.clear + ~EnergyWizard.cosine_dist + ~EnergyWizard.count_tokens + ~EnergyWizard.engineer_query + ~EnergyWizard.generic_async_query + ~EnergyWizard.generic_query + ~EnergyWizard.get_embedding + ~EnergyWizard.make_ref_list + ~EnergyWizard.preflight_corpus + ~EnergyWizard.rank_strings + + + + + + .. rubric:: Attributes + + .. autosummary:: + + ~EnergyWizard.DEFAULT_MODEL + ~EnergyWizard.EMBEDDING_MODEL + ~EnergyWizard.EMBEDDING_URL + ~EnergyWizard.HEADERS + ~EnergyWizard.MODEL_INSTRUCTION + ~EnergyWizard.MODEL_ROLE + ~EnergyWizard.URL + ~EnergyWizard.all_messages_txt + + \ No newline at end of file diff --git a/_sources/_autosummary/elm.wizard.rst.txt b/_sources/_autosummary/elm.wizard.rst.txt new file mode 100644 index 00000000..020f7693 --- /dev/null +++ b/_sources/_autosummary/elm.wizard.rst.txt @@ -0,0 +1,31 @@ +elm.wizard +========== + +.. automodule:: elm.wizard + + + + + + + + + + + + .. rubric:: Classes + + .. autosummary:: + :toctree: + :template: custom-class-template.rst + + EnergyWizard + + + + + + + + + diff --git a/_sources/_cli/cli.rst.txt b/_sources/_cli/cli.rst.txt new file mode 100644 index 00000000..a566d657 --- /dev/null +++ b/_sources/_cli/cli.rst.txt @@ -0,0 +1,8 @@ +.. _cli-docs: + +Command Line Interfaces (CLIs) +============================== + +.. toctree:: + + elm diff --git a/_sources/_cli/elm.rst.txt b/_sources/_cli/elm.rst.txt new file mode 100644 index 00000000..aa55997f --- /dev/null +++ b/_sources/_cli/elm.rst.txt @@ -0,0 +1,3 @@ +.. click:: elm.cli:main + :prog: elm + :nested: full \ No newline at end of file diff --git a/_sources/api.rst.txt b/_sources/api.rst.txt new file mode 100644 index 00000000..14895b3d --- /dev/null +++ b/_sources/api.rst.txt @@ -0,0 +1,6 @@ +.. autosummary:: + :toctree: _autosummary + :template: custom-module-template.rst + :recursive: + + elm diff --git a/_sources/examples.energy_wizard.rst.txt b/_sources/examples.energy_wizard.rst.txt new file mode 100644 index 00000000..83b156b2 --- /dev/null +++ b/_sources/examples.energy_wizard.rst.txt @@ -0,0 +1,2 @@ +.. include:: ../../examples/energy_wizard/README.rst + :start-line: 0 diff --git a/_sources/examples.ordinance_gpt.rst.txt b/_sources/examples.ordinance_gpt.rst.txt new file mode 100644 index 00000000..2cb4845c --- /dev/null +++ b/_sources/examples.ordinance_gpt.rst.txt @@ -0,0 +1,2 @@ +.. include:: ../../examples/ordinance_gpt/README.rst + :start-line: 0 diff --git a/_sources/examples.rst.txt b/_sources/examples.rst.txt new file mode 100644 index 00000000..84582d5e --- /dev/null +++ b/_sources/examples.rst.txt @@ -0,0 +1,6 @@ +Examples +======== +.. toctree:: + + examples.energy_wizard.rst + examples.ordinance_gpt.rst diff --git a/_sources/index.rst.txt b/_sources/index.rst.txt new file mode 100644 index 00000000..998fc741 --- /dev/null +++ b/_sources/index.rst.txt @@ -0,0 +1,10 @@ +.. toctree:: + :hidden: + + Home page + Installation + Examples + API reference <_autosummary/elm> + CLI reference <_cli/cli> + +.. include:: ../../README.rst diff --git a/_sources/installation.rst.txt b/_sources/installation.rst.txt new file mode 100644 index 00000000..6b45526f --- /dev/null +++ b/_sources/installation.rst.txt @@ -0,0 +1,6 @@ +Installation +============ + +.. include:: ../../README.rst + :start-after: install + :end-before: acknowledgements diff --git a/_static/_sphinx_javascript_frameworks_compat.js b/_static/_sphinx_javascript_frameworks_compat.js new file mode 100644 index 00000000..81415803 --- /dev/null +++ b/_static/_sphinx_javascript_frameworks_compat.js @@ -0,0 +1,123 @@ +/* Compatability shim for jQuery and underscores.js. + * + * Copyright Sphinx contributors + * Released under the two clause BSD licence + */ + +/** + * small helper function to urldecode strings + * + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/decodeURIComponent#Decoding_query_parameters_from_a_URL + */ +jQuery.urldecode = function(x) { + if (!x) { + return x + } + return decodeURIComponent(x.replace(/\+/g, ' ')); +}; + +/** + * small helper function to urlencode strings + */ +jQuery.urlencode = encodeURIComponent; + +/** + * This function returns the parsed url parameters of the + * current request. Multiple values per key are supported, + * it will always return arrays of strings for the value parts. + */ +jQuery.getQueryParameters = function(s) { + if (typeof s === 'undefined') + s = document.location.search; + var parts = s.substr(s.indexOf('?') + 1).split('&'); + var result = {}; + for (var i = 0; i < parts.length; i++) { + var tmp = parts[i].split('=', 2); + var key = jQuery.urldecode(tmp[0]); + var value = jQuery.urldecode(tmp[1]); + if (key in result) + result[key].push(value); + else + result[key] = [value]; + } + return result; +}; + +/** + * highlight a given string on a jquery object by wrapping it in + * span elements with the given class name. + */ +jQuery.fn.highlightText = function(text, className) { + function highlight(node, addItems) { + if (node.nodeType === 3) { + var val = node.nodeValue; + var pos = val.toLowerCase().indexOf(text); + if (pos >= 0 && + !jQuery(node.parentNode).hasClass(className) && + !jQuery(node.parentNode).hasClass("nohighlight")) { + var span; + var isInSVG = jQuery(node).closest("body, svg, foreignObject").is("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.className = className; + } + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + node.parentNode.insertBefore(span, node.parentNode.insertBefore( + document.createTextNode(val.substr(pos + text.length)), + node.nextSibling)); + node.nodeValue = val.substr(0, pos); + if (isInSVG) { + var rect = document.createElementNS("http://www.w3.org/2000/svg", "rect"); + var bbox = node.parentElement.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute('class', className); + addItems.push({ + "parent": node.parentNode, + "target": rect}); + } + } + } + else if (!jQuery(node).is("button, select, textarea")) { + jQuery.each(node.childNodes, function() { + highlight(this, addItems); + }); + } + } + var addItems = []; + var result = this.each(function() { + highlight(this, addItems); + }); + for (var i = 0; i < addItems.length; ++i) { + jQuery(addItems[i].parent).before(addItems[i].target); + } + return result; +}; + +/* + * backward compatibility for jQuery.browser + * This will be supported until firefox bug is fixed. + */ +if (!jQuery.browser) { + jQuery.uaMatch = function(ua) { + ua = ua.toLowerCase(); + + var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || + /(webkit)[ \/]([\w.]+)/.exec(ua) || + /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || + /(msie) ([\w.]+)/.exec(ua) || + ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || + []; + + return { + browser: match[ 1 ] || "", + version: match[ 2 ] || "0" + }; + }; + jQuery.browser = {}; + jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; +} diff --git a/_static/basic.css b/_static/basic.css new file mode 100644 index 00000000..f316efcb --- /dev/null +++ b/_static/basic.css @@ -0,0 +1,925 @@ +/* + * basic.css + * ~~~~~~~~~ + * + * Sphinx stylesheet -- basic theme. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ + +/* -- main layout ----------------------------------------------------------- */ + +div.clearer { + clear: both; +} + +div.section::after { + display: block; + content: ''; + clear: left; +} + +/* -- relbar ---------------------------------------------------------------- */ + +div.related { + width: 100%; + font-size: 90%; +} + +div.related h3 { + display: none; +} + +div.related ul { + margin: 0; + padding: 0 0 0 10px; + list-style: none; +} + +div.related li { + display: inline; +} + +div.related li.right { + float: right; + margin-right: 5px; +} + +/* -- sidebar --------------------------------------------------------------- */ + +div.sphinxsidebarwrapper { + padding: 10px 5px 0 10px; +} + +div.sphinxsidebar { + float: left; + width: 230px; + margin-left: -100%; + font-size: 90%; + word-wrap: break-word; + overflow-wrap : break-word; +} + +div.sphinxsidebar ul { + list-style: none; +} + +div.sphinxsidebar ul ul, +div.sphinxsidebar ul.want-points { + margin-left: 20px; + list-style: square; +} + +div.sphinxsidebar ul ul { + margin-top: 0; + margin-bottom: 0; +} + +div.sphinxsidebar form { + margin-top: 10px; +} + +div.sphinxsidebar input { + border: 1px solid #98dbcc; + font-family: sans-serif; + font-size: 1em; +} + +div.sphinxsidebar #searchbox form.search { + overflow: hidden; +} + +div.sphinxsidebar #searchbox input[type="text"] { + float: left; + width: 80%; + padding: 0.25em; + box-sizing: border-box; +} + +div.sphinxsidebar #searchbox input[type="submit"] { + float: left; + width: 20%; + border-left: none; + padding: 0.25em; + box-sizing: border-box; +} + + +img { + border: 0; + max-width: 100%; +} + +/* -- search page ----------------------------------------------------------- */ + +ul.search { + margin: 10px 0 0 20px; + padding: 0; +} + +ul.search li { + padding: 5px 0 5px 20px; + background-image: url(file.png); + background-repeat: no-repeat; + background-position: 0 7px; +} + +ul.search li a { + font-weight: bold; +} + +ul.search li p.context { + color: #888; + margin: 2px 0 0 30px; + text-align: left; +} + +ul.keywordmatches li.goodmatch a { + font-weight: bold; +} + +/* -- index page ------------------------------------------------------------ */ + +table.contentstable { + width: 90%; + margin-left: auto; + margin-right: auto; +} + +table.contentstable p.biglink { + line-height: 150%; +} + +a.biglink { + font-size: 1.3em; +} + +span.linkdescr { + font-style: italic; + padding-top: 5px; + font-size: 90%; +} + +/* -- general index --------------------------------------------------------- */ + +table.indextable { + width: 100%; +} + +table.indextable td { + text-align: left; + vertical-align: top; +} + +table.indextable ul { + margin-top: 0; + margin-bottom: 0; + list-style-type: none; +} + +table.indextable > tbody > tr > td > ul { + padding-left: 0em; +} + +table.indextable tr.pcap { + height: 10px; +} + +table.indextable tr.cap { + margin-top: 10px; + background-color: #f2f2f2; +} + +img.toggler { + margin-right: 3px; + margin-top: 3px; + cursor: pointer; +} + +div.modindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +div.genindex-jumpbox { + border-top: 1px solid #ddd; + border-bottom: 1px solid #ddd; + margin: 1em 0 1em 0; + padding: 0.4em; +} + +/* -- domain module index --------------------------------------------------- */ + +table.modindextable td { + padding: 2px; + border-collapse: collapse; +} + +/* -- general body styles --------------------------------------------------- */ + +div.body { + min-width: 360px; + max-width: 800px; +} + +div.body p, div.body dd, div.body li, div.body blockquote { + -moz-hyphens: auto; + -ms-hyphens: auto; + -webkit-hyphens: auto; + hyphens: auto; +} + +a.headerlink { + visibility: hidden; +} + +a:visited { + color: #551A8B; +} + +h1:hover > a.headerlink, +h2:hover > a.headerlink, +h3:hover > a.headerlink, +h4:hover > a.headerlink, +h5:hover > a.headerlink, +h6:hover > a.headerlink, +dt:hover > a.headerlink, +caption:hover > a.headerlink, +p.caption:hover > a.headerlink, +div.code-block-caption:hover > a.headerlink { + visibility: visible; +} + +div.body p.caption { + text-align: inherit; +} + +div.body td { + text-align: left; +} + +.first { + margin-top: 0 !important; +} + +p.rubric { + margin-top: 30px; + font-weight: bold; +} + +img.align-left, figure.align-left, .figure.align-left, object.align-left { + clear: left; + float: left; + margin-right: 1em; +} + +img.align-right, figure.align-right, .figure.align-right, object.align-right { + clear: right; + float: right; + margin-left: 1em; +} + +img.align-center, figure.align-center, .figure.align-center, object.align-center { + display: block; + margin-left: auto; + margin-right: auto; +} + +img.align-default, figure.align-default, .figure.align-default { + display: block; + margin-left: auto; + margin-right: auto; +} + +.align-left { + text-align: left; +} + +.align-center { + text-align: center; +} + +.align-default { + text-align: center; +} + +.align-right { + text-align: right; +} + +/* -- sidebars -------------------------------------------------------------- */ + +div.sidebar, +aside.sidebar { + margin: 0 0 0.5em 1em; + border: 1px solid #ddb; + padding: 7px; + background-color: #ffe; + width: 40%; + float: right; + clear: right; + overflow-x: auto; +} + +p.sidebar-title { + font-weight: bold; +} + +nav.contents, +aside.topic, +div.admonition, div.topic, blockquote { + clear: left; +} + +/* -- topics ---------------------------------------------------------------- */ + +nav.contents, +aside.topic, +div.topic { + border: 1px solid #ccc; + padding: 7px; + margin: 10px 0 10px 0; +} + +p.topic-title { + font-size: 1.1em; + font-weight: bold; + margin-top: 10px; +} + +/* -- admonitions ----------------------------------------------------------- */ + +div.admonition { + margin-top: 10px; + margin-bottom: 10px; + padding: 7px; +} + +div.admonition dt { + font-weight: bold; +} + +p.admonition-title { + margin: 0px 10px 5px 0px; + font-weight: bold; +} + +div.body p.centered { + text-align: center; + margin-top: 25px; +} + +/* -- content of sidebars/topics/admonitions -------------------------------- */ + +div.sidebar > :last-child, +aside.sidebar > :last-child, +nav.contents > :last-child, +aside.topic > :last-child, +div.topic > :last-child, +div.admonition > :last-child { + margin-bottom: 0; +} + +div.sidebar::after, +aside.sidebar::after, +nav.contents::after, +aside.topic::after, +div.topic::after, +div.admonition::after, +blockquote::after { + display: block; + content: ''; + clear: both; +} + +/* -- tables ---------------------------------------------------------------- */ + +table.docutils { + margin-top: 10px; + margin-bottom: 10px; + border: 0; + border-collapse: collapse; +} + +table.align-center { + margin-left: auto; + margin-right: auto; +} + +table.align-default { + margin-left: auto; + margin-right: auto; +} + +table caption span.caption-number { + font-style: italic; +} + +table caption span.caption-text { +} + +table.docutils td, table.docutils th { + padding: 1px 8px 1px 5px; + border-top: 0; + border-left: 0; + border-right: 0; + border-bottom: 1px solid #aaa; +} + +th { + text-align: left; + padding-right: 5px; +} + +table.citation { + border-left: solid 1px gray; + margin-left: 1px; +} + +table.citation td { + border-bottom: none; +} + +th > :first-child, +td > :first-child { + margin-top: 0px; +} + +th > :last-child, +td > :last-child { + margin-bottom: 0px; +} + +/* -- figures --------------------------------------------------------------- */ + +div.figure, figure { + margin: 0.5em; + padding: 0.5em; +} + +div.figure p.caption, figcaption { + padding: 0.3em; +} + +div.figure p.caption span.caption-number, +figcaption span.caption-number { + font-style: italic; +} + +div.figure p.caption span.caption-text, +figcaption span.caption-text { +} + +/* -- field list styles ----------------------------------------------------- */ + +table.field-list td, table.field-list th { + border: 0 !important; +} + +.field-list ul { + margin: 0; + padding-left: 1em; +} + +.field-list p { + margin: 0; +} + +.field-name { + -moz-hyphens: manual; + -ms-hyphens: manual; + -webkit-hyphens: manual; + hyphens: manual; +} + +/* -- hlist styles ---------------------------------------------------------- */ + +table.hlist { + margin: 1em 0; +} + +table.hlist td { + vertical-align: top; +} + +/* -- object description styles --------------------------------------------- */ + +.sig { + font-family: 'Consolas', 'Menlo', 'DejaVu Sans Mono', 'Bitstream Vera Sans Mono', monospace; +} + +.sig-name, code.descname { + background-color: transparent; + font-weight: bold; +} + +.sig-name { + font-size: 1.1em; +} + +code.descname { + font-size: 1.2em; +} + +.sig-prename, code.descclassname { + background-color: transparent; +} + +.optional { + font-size: 1.3em; +} + +.sig-paren { + font-size: larger; +} + +.sig-param.n { + font-style: italic; +} + +/* C++ specific styling */ + +.sig-inline.c-texpr, +.sig-inline.cpp-texpr { + font-family: unset; +} + +.sig.c .k, .sig.c .kt, +.sig.cpp .k, .sig.cpp .kt { + color: #0033B3; +} + +.sig.c .m, +.sig.cpp .m { + color: #1750EB; +} + +.sig.c .s, .sig.c .sc, +.sig.cpp .s, .sig.cpp .sc { + color: #067D17; +} + + +/* -- other body styles ----------------------------------------------------- */ + +ol.arabic { + list-style: decimal; +} + +ol.loweralpha { + list-style: lower-alpha; +} + +ol.upperalpha { + list-style: upper-alpha; +} + +ol.lowerroman { + list-style: lower-roman; +} + +ol.upperroman { + list-style: upper-roman; +} + +:not(li) > ol > li:first-child > :first-child, +:not(li) > ul > li:first-child > :first-child { + margin-top: 0px; +} + +:not(li) > ol > li:last-child > :last-child, +:not(li) > ul > li:last-child > :last-child { + margin-bottom: 0px; +} + +ol.simple ol p, +ol.simple ul p, +ul.simple ol p, +ul.simple ul p { + margin-top: 0; +} + +ol.simple > li:not(:first-child) > p, +ul.simple > li:not(:first-child) > p { + margin-top: 0; +} + +ol.simple p, +ul.simple p { + margin-bottom: 0; +} + +aside.footnote > span, +div.citation > span { + float: left; +} +aside.footnote > span:last-of-type, +div.citation > span:last-of-type { + padding-right: 0.5em; +} +aside.footnote > p { + margin-left: 2em; +} +div.citation > p { + margin-left: 4em; +} +aside.footnote > p:last-of-type, +div.citation > p:last-of-type { + margin-bottom: 0em; +} +aside.footnote > p:last-of-type:after, +div.citation > p:last-of-type:after { + content: ""; + clear: both; +} + +dl.field-list { + display: grid; + grid-template-columns: fit-content(30%) auto; +} + +dl.field-list > dt { + font-weight: bold; + word-break: break-word; + padding-left: 0.5em; + padding-right: 5px; +} + +dl.field-list > dd { + padding-left: 0.5em; + margin-top: 0em; + margin-left: 0em; + margin-bottom: 0em; +} + +dl { + margin-bottom: 15px; +} + +dd > :first-child { + margin-top: 0px; +} + +dd ul, dd table { + margin-bottom: 10px; +} + +dd { + margin-top: 3px; + margin-bottom: 10px; + margin-left: 30px; +} + +.sig dd { + margin-top: 0px; + margin-bottom: 0px; +} + +.sig dl { + margin-top: 0px; + margin-bottom: 0px; +} + +dl > dd:last-child, +dl > dd:last-child > :last-child { + margin-bottom: 0; +} + +dt:target, span.highlighted { + background-color: #fbe54e; +} + +rect.highlighted { + fill: #fbe54e; +} + +dl.glossary dt { + font-weight: bold; + font-size: 1.1em; +} + +.versionmodified { + font-style: italic; +} + +.system-message { + background-color: #fda; + padding: 5px; + border: 3px solid red; +} + +.footnote:target { + background-color: #ffa; +} + +.line-block { + display: block; + margin-top: 1em; + margin-bottom: 1em; +} + +.line-block .line-block { + margin-top: 0; + margin-bottom: 0; + margin-left: 1.5em; +} + +.guilabel, .menuselection { + font-family: sans-serif; +} + +.accelerator { + text-decoration: underline; +} + +.classifier { + font-style: oblique; +} + +.classifier:before { + font-style: normal; + margin: 0 0.5em; + content: ":"; + display: inline-block; +} + +abbr, acronym { + border-bottom: dotted 1px; + cursor: help; +} + +.translated { + background-color: rgba(207, 255, 207, 0.2) +} + +.untranslated { + background-color: rgba(255, 207, 207, 0.2) +} + +/* -- code displays --------------------------------------------------------- */ + +pre { + overflow: auto; + overflow-y: hidden; /* fixes display issues on Chrome browsers */ +} + +pre, div[class*="highlight-"] { + clear: both; +} + +span.pre { + -moz-hyphens: none; + -ms-hyphens: none; + -webkit-hyphens: none; + hyphens: none; + white-space: nowrap; +} + +div[class*="highlight-"] { + margin: 1em 0; +} + +td.linenos pre { + border: 0; + background-color: transparent; + color: #aaa; +} + +table.highlighttable { + display: block; +} + +table.highlighttable tbody { + display: block; +} + +table.highlighttable tr { + display: flex; +} + +table.highlighttable td { + margin: 0; + padding: 0; +} + +table.highlighttable td.linenos { + padding-right: 0.5em; +} + +table.highlighttable td.code { + flex: 1; + overflow: hidden; +} + +.highlight .hll { + display: block; +} + +div.highlight pre, +table.highlighttable pre { + margin: 0; +} + +div.code-block-caption + div { + margin-top: 0; +} + +div.code-block-caption { + margin-top: 1em; + padding: 2px 5px; + font-size: small; +} + +div.code-block-caption code { + background-color: transparent; +} + +table.highlighttable td.linenos, +span.linenos, +div.highlight span.gp { /* gp: Generic.Prompt */ + user-select: none; + -webkit-user-select: text; /* Safari fallback only */ + -webkit-user-select: none; /* Chrome/Safari */ + -moz-user-select: none; /* Firefox */ + -ms-user-select: none; /* IE10+ */ +} + +div.code-block-caption span.caption-number { + padding: 0.1em 0.3em; + font-style: italic; +} + +div.code-block-caption span.caption-text { +} + +div.literal-block-wrapper { + margin: 1em 0; +} + +code.xref, a code { + background-color: transparent; + font-weight: bold; +} + +h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { + background-color: transparent; +} + +.viewcode-link { + float: right; +} + +.viewcode-back { + float: right; + font-family: sans-serif; +} + +div.viewcode-block:target { + margin: -1px -10px; + padding: 0 10px; +} + +/* -- math display ---------------------------------------------------------- */ + +img.math { + vertical-align: middle; +} + +div.body div.math p { + text-align: center; +} + +span.eqno { + float: right; +} + +span.eqno a.headerlink { + position: absolute; + z-index: 1; +} + +div.math:hover a.headerlink { + visibility: visible; +} + +/* -- printout stylesheet --------------------------------------------------- */ + +@media print { + div.document, + div.documentwrapper, + div.bodywrapper { + margin: 0 !important; + width: 100%; + } + + div.sphinxsidebar, + div.related, + div.footer, + #top-link { + display: none; + } +} \ No newline at end of file diff --git a/_static/css/badge_only.css b/_static/css/badge_only.css new file mode 100644 index 00000000..c718cee4 --- /dev/null +++ b/_static/css/badge_only.css @@ -0,0 +1 @@ +.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}@font-face{font-family:FontAwesome;font-style:normal;font-weight:400;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#FontAwesome) format("svg")}.fa:before{font-family:FontAwesome;font-style:normal;font-weight:400;line-height:1}.fa:before,a .fa{text-decoration:inherit}.fa:before,a .fa,li .fa{display:inline-block}li .fa-large:before{width:1.875em}ul.fas{list-style-type:none;margin-left:2em;text-indent:-.8em}ul.fas li .fa{width:.8em}ul.fas li .fa-large:before{vertical-align:baseline}.fa-book:before,.icon-book:before{content:"\f02d"}.fa-caret-down:before,.icon-caret-down:before{content:"\f0d7"}.fa-caret-up:before,.icon-caret-up:before{content:"\f0d8"}.fa-caret-left:before,.icon-caret-left:before{content:"\f0d9"}.fa-caret-right:before,.icon-caret-right:before{content:"\f0da"}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60}.rst-versions .rst-current-version:after{clear:both;content:"";display:block}.rst-versions .rst-current-version .fa{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}} \ No newline at end of file diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff b/_static/css/fonts/Roboto-Slab-Bold.woff new file mode 100644 index 00000000..6cb60000 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Bold.woff2 b/_static/css/fonts/Roboto-Slab-Bold.woff2 new file mode 100644 index 00000000..7059e231 Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Bold.woff2 differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff b/_static/css/fonts/Roboto-Slab-Regular.woff new file mode 100644 index 00000000..f815f63f Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff differ diff --git a/_static/css/fonts/Roboto-Slab-Regular.woff2 b/_static/css/fonts/Roboto-Slab-Regular.woff2 new file mode 100644 index 00000000..f2c76e5b Binary files /dev/null and b/_static/css/fonts/Roboto-Slab-Regular.woff2 differ diff --git a/_static/css/fonts/fontawesome-webfont.eot b/_static/css/fonts/fontawesome-webfont.eot new file mode 100644 index 00000000..e9f60ca9 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.eot differ diff --git a/_static/css/fonts/fontawesome-webfont.svg b/_static/css/fonts/fontawesome-webfont.svg new file mode 100644 index 00000000..855c845e --- /dev/null +++ b/_static/css/fonts/fontawesome-webfont.svg @@ -0,0 +1,2671 @@ + + + + +Created by FontForge 20120731 at Mon Oct 24 17:37:40 2016 + By ,,, +Copyright Dave Gandy 2016. All rights reserved. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/_static/css/fonts/fontawesome-webfont.ttf b/_static/css/fonts/fontawesome-webfont.ttf new file mode 100644 index 00000000..35acda2f Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.ttf differ diff --git a/_static/css/fonts/fontawesome-webfont.woff b/_static/css/fonts/fontawesome-webfont.woff new file mode 100644 index 00000000..400014a4 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff differ diff --git a/_static/css/fonts/fontawesome-webfont.woff2 b/_static/css/fonts/fontawesome-webfont.woff2 new file mode 100644 index 00000000..4d13fc60 Binary files /dev/null and b/_static/css/fonts/fontawesome-webfont.woff2 differ diff --git a/_static/css/fonts/lato-bold-italic.woff b/_static/css/fonts/lato-bold-italic.woff new file mode 100644 index 00000000..88ad05b9 Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff differ diff --git a/_static/css/fonts/lato-bold-italic.woff2 b/_static/css/fonts/lato-bold-italic.woff2 new file mode 100644 index 00000000..c4e3d804 Binary files /dev/null and b/_static/css/fonts/lato-bold-italic.woff2 differ diff --git a/_static/css/fonts/lato-bold.woff b/_static/css/fonts/lato-bold.woff new file mode 100644 index 00000000..c6dff51f Binary files /dev/null and b/_static/css/fonts/lato-bold.woff differ diff --git a/_static/css/fonts/lato-bold.woff2 b/_static/css/fonts/lato-bold.woff2 new file mode 100644 index 00000000..bb195043 Binary files /dev/null and b/_static/css/fonts/lato-bold.woff2 differ diff --git a/_static/css/fonts/lato-normal-italic.woff b/_static/css/fonts/lato-normal-italic.woff new file mode 100644 index 00000000..76114bc0 Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff differ diff --git a/_static/css/fonts/lato-normal-italic.woff2 b/_static/css/fonts/lato-normal-italic.woff2 new file mode 100644 index 00000000..3404f37e Binary files /dev/null and b/_static/css/fonts/lato-normal-italic.woff2 differ diff --git a/_static/css/fonts/lato-normal.woff b/_static/css/fonts/lato-normal.woff new file mode 100644 index 00000000..ae1307ff Binary files /dev/null and b/_static/css/fonts/lato-normal.woff differ diff --git a/_static/css/fonts/lato-normal.woff2 b/_static/css/fonts/lato-normal.woff2 new file mode 100644 index 00000000..3bf98433 Binary files /dev/null and b/_static/css/fonts/lato-normal.woff2 differ diff --git a/_static/css/theme.css b/_static/css/theme.css new file mode 100644 index 00000000..19a446a0 --- /dev/null +++ b/_static/css/theme.css @@ -0,0 +1,4 @@ +html{box-sizing:border-box}*,:after,:before{box-sizing:inherit}article,aside,details,figcaption,figure,footer,header,hgroup,nav,section{display:block}audio,canvas,video{display:inline-block;*display:inline;*zoom:1}[hidden],audio:not([controls]){display:none}*{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}html{font-size:100%;-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%}body{margin:0}a:active,a:hover{outline:0}abbr[title]{border-bottom:1px dotted}b,strong{font-weight:700}blockquote{margin:0}dfn{font-style:italic}ins{background:#ff9;text-decoration:none}ins,mark{color:#000}mark{background:#ff0;font-style:italic;font-weight:700}.rst-content code,.rst-content tt,code,kbd,pre,samp{font-family:monospace,serif;_font-family:courier new,monospace;font-size:1em}pre{white-space:pre}q{quotes:none}q:after,q:before{content:"";content:none}small{font-size:85%}sub,sup{font-size:75%;line-height:0;position:relative;vertical-align:baseline}sup{top:-.5em}sub{bottom:-.25em}dl,ol,ul{margin:0;padding:0;list-style:none;list-style-image:none}li{list-style:none}dd{margin:0}img{border:0;-ms-interpolation-mode:bicubic;vertical-align:middle;max-width:100%}svg:not(:root){overflow:hidden}figure,form{margin:0}label{cursor:pointer}button,input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}button,input{line-height:normal}button,input[type=button],input[type=reset],input[type=submit]{cursor:pointer;-webkit-appearance:button;*overflow:visible}button[disabled],input[disabled]{cursor:default}input[type=search]{-webkit-appearance:textfield;-moz-box-sizing:content-box;-webkit-box-sizing:content-box;box-sizing:content-box}textarea{resize:vertical}table{border-collapse:collapse;border-spacing:0}td{vertical-align:top}.chromeframe{margin:.2em 0;background:#ccc;color:#000;padding:.2em 0}.ir{display:block;border:0;text-indent:-999em;overflow:hidden;background-color:transparent;background-repeat:no-repeat;text-align:left;direction:ltr;*line-height:0}.ir br{display:none}.hidden{display:none!important;visibility:hidden}.visuallyhidden{border:0;clip:rect(0 0 0 0);height:1px;margin:-1px;overflow:hidden;padding:0;position:absolute;width:1px}.visuallyhidden.focusable:active,.visuallyhidden.focusable:focus{clip:auto;height:auto;margin:0;overflow:visible;position:static;width:auto}.invisible{visibility:hidden}.relative{position:relative}big,small{font-size:100%}@media print{body,html,section{background:none!important}*{box-shadow:none!important;text-shadow:none!important;filter:none!important;-ms-filter:none!important}a,a:visited{text-decoration:underline}.ir a:after,a[href^="#"]:after,a[href^="javascript:"]:after{content:""}blockquote,pre{page-break-inside:avoid}thead{display:table-header-group}img,tr{page-break-inside:avoid}img{max-width:100%!important}@page{margin:.5cm}.rst-content .toctree-wrapper>p.caption,h2,h3,p{orphans:3;widows:3}.rst-content .toctree-wrapper>p.caption,h2,h3{page-break-after:avoid}}.btn,.fa:before,.icon:before,.rst-content .admonition,.rst-content .admonition-title:before,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .code-block-caption .headerlink:before,.rst-content .danger,.rst-content .eqno .headerlink:before,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-alert,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before,input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week],select,textarea{-webkit-font-smoothing:antialiased}.clearfix{*zoom:1}.clearfix:after,.clearfix:before{display:table;content:""}.clearfix:after{clear:both}/*! + * Font Awesome 4.7.0 by @davegandy - http://fontawesome.io - @fontawesome + * License - http://fontawesome.io/license (Font: SIL OFL 1.1, CSS: MIT License) + */@font-face{font-family:FontAwesome;src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713);src:url(fonts/fontawesome-webfont.eot?674f50d287a8c48dc19ba404d20fe713?#iefix&v=4.7.0) format("embedded-opentype"),url(fonts/fontawesome-webfont.woff2?af7ae505a9eed503f8b8e6982036873e) format("woff2"),url(fonts/fontawesome-webfont.woff?fee66e712a8a08eef5805a46892932ad) format("woff"),url(fonts/fontawesome-webfont.ttf?b06871f281fee6b241d60582ae9369b9) format("truetype"),url(fonts/fontawesome-webfont.svg?912ec66d7572ff821749319396470bde#fontawesomeregular) format("svg");font-weight:400;font-style:normal}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{display:inline-block;font:normal normal normal 14px/1 FontAwesome;font-size:inherit;text-rendering:auto;-webkit-font-smoothing:antialiased;-moz-osx-font-smoothing:grayscale}.fa-lg{font-size:1.33333em;line-height:.75em;vertical-align:-15%}.fa-2x{font-size:2em}.fa-3x{font-size:3em}.fa-4x{font-size:4em}.fa-5x{font-size:5em}.fa-fw{width:1.28571em;text-align:center}.fa-ul{padding-left:0;margin-left:2.14286em;list-style-type:none}.fa-ul>li{position:relative}.fa-li{position:absolute;left:-2.14286em;width:2.14286em;top:.14286em;text-align:center}.fa-li.fa-lg{left:-1.85714em}.fa-border{padding:.2em .25em .15em;border:.08em solid #eee;border-radius:.1em}.fa-pull-left{float:left}.fa-pull-right{float:right}.fa-pull-left.icon,.fa.fa-pull-left,.rst-content .code-block-caption .fa-pull-left.headerlink,.rst-content .eqno .fa-pull-left.headerlink,.rst-content .fa-pull-left.admonition-title,.rst-content code.download span.fa-pull-left:first-child,.rst-content dl dt .fa-pull-left.headerlink,.rst-content h1 .fa-pull-left.headerlink,.rst-content h2 .fa-pull-left.headerlink,.rst-content h3 .fa-pull-left.headerlink,.rst-content h4 .fa-pull-left.headerlink,.rst-content h5 .fa-pull-left.headerlink,.rst-content h6 .fa-pull-left.headerlink,.rst-content p .fa-pull-left.headerlink,.rst-content table>caption .fa-pull-left.headerlink,.rst-content tt.download span.fa-pull-left:first-child,.wy-menu-vertical li.current>a button.fa-pull-left.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-left.toctree-expand,.wy-menu-vertical li button.fa-pull-left.toctree-expand{margin-right:.3em}.fa-pull-right.icon,.fa.fa-pull-right,.rst-content .code-block-caption .fa-pull-right.headerlink,.rst-content .eqno .fa-pull-right.headerlink,.rst-content .fa-pull-right.admonition-title,.rst-content code.download span.fa-pull-right:first-child,.rst-content dl dt .fa-pull-right.headerlink,.rst-content h1 .fa-pull-right.headerlink,.rst-content h2 .fa-pull-right.headerlink,.rst-content h3 .fa-pull-right.headerlink,.rst-content h4 .fa-pull-right.headerlink,.rst-content h5 .fa-pull-right.headerlink,.rst-content h6 .fa-pull-right.headerlink,.rst-content p .fa-pull-right.headerlink,.rst-content table>caption .fa-pull-right.headerlink,.rst-content tt.download span.fa-pull-right:first-child,.wy-menu-vertical li.current>a button.fa-pull-right.toctree-expand,.wy-menu-vertical li.on a button.fa-pull-right.toctree-expand,.wy-menu-vertical li button.fa-pull-right.toctree-expand{margin-left:.3em}.pull-right{float:right}.pull-left{float:left}.fa.pull-left,.pull-left.icon,.rst-content .code-block-caption .pull-left.headerlink,.rst-content .eqno .pull-left.headerlink,.rst-content .pull-left.admonition-title,.rst-content code.download span.pull-left:first-child,.rst-content dl dt .pull-left.headerlink,.rst-content h1 .pull-left.headerlink,.rst-content h2 .pull-left.headerlink,.rst-content h3 .pull-left.headerlink,.rst-content h4 .pull-left.headerlink,.rst-content h5 .pull-left.headerlink,.rst-content h6 .pull-left.headerlink,.rst-content p .pull-left.headerlink,.rst-content table>caption .pull-left.headerlink,.rst-content tt.download span.pull-left:first-child,.wy-menu-vertical li.current>a button.pull-left.toctree-expand,.wy-menu-vertical li.on a button.pull-left.toctree-expand,.wy-menu-vertical li button.pull-left.toctree-expand{margin-right:.3em}.fa.pull-right,.pull-right.icon,.rst-content .code-block-caption .pull-right.headerlink,.rst-content .eqno .pull-right.headerlink,.rst-content .pull-right.admonition-title,.rst-content code.download span.pull-right:first-child,.rst-content dl dt .pull-right.headerlink,.rst-content h1 .pull-right.headerlink,.rst-content h2 .pull-right.headerlink,.rst-content h3 .pull-right.headerlink,.rst-content h4 .pull-right.headerlink,.rst-content h5 .pull-right.headerlink,.rst-content h6 .pull-right.headerlink,.rst-content p .pull-right.headerlink,.rst-content table>caption .pull-right.headerlink,.rst-content tt.download span.pull-right:first-child,.wy-menu-vertical li.current>a button.pull-right.toctree-expand,.wy-menu-vertical li.on a button.pull-right.toctree-expand,.wy-menu-vertical li button.pull-right.toctree-expand{margin-left:.3em}.fa-spin{-webkit-animation:fa-spin 2s linear infinite;animation:fa-spin 2s linear infinite}.fa-pulse{-webkit-animation:fa-spin 1s steps(8) infinite;animation:fa-spin 1s steps(8) infinite}@-webkit-keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}@keyframes fa-spin{0%{-webkit-transform:rotate(0deg);transform:rotate(0deg)}to{-webkit-transform:rotate(359deg);transform:rotate(359deg)}}.fa-rotate-90{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=1)";-webkit-transform:rotate(90deg);-ms-transform:rotate(90deg);transform:rotate(90deg)}.fa-rotate-180{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2)";-webkit-transform:rotate(180deg);-ms-transform:rotate(180deg);transform:rotate(180deg)}.fa-rotate-270{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=3)";-webkit-transform:rotate(270deg);-ms-transform:rotate(270deg);transform:rotate(270deg)}.fa-flip-horizontal{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=0, mirror=1)";-webkit-transform:scaleX(-1);-ms-transform:scaleX(-1);transform:scaleX(-1)}.fa-flip-vertical{-ms-filter:"progid:DXImageTransform.Microsoft.BasicImage(rotation=2, mirror=1)";-webkit-transform:scaleY(-1);-ms-transform:scaleY(-1);transform:scaleY(-1)}:root .fa-flip-horizontal,:root .fa-flip-vertical,:root .fa-rotate-90,:root .fa-rotate-180,:root .fa-rotate-270{filter:none}.fa-stack{position:relative;display:inline-block;width:2em;height:2em;line-height:2em;vertical-align:middle}.fa-stack-1x,.fa-stack-2x{position:absolute;left:0;width:100%;text-align:center}.fa-stack-1x{line-height:inherit}.fa-stack-2x{font-size:2em}.fa-inverse{color:#fff}.fa-glass:before{content:""}.fa-music:before{content:""}.fa-search:before,.icon-search:before{content:""}.fa-envelope-o:before{content:""}.fa-heart:before{content:""}.fa-star:before{content:""}.fa-star-o:before{content:""}.fa-user:before{content:""}.fa-film:before{content:""}.fa-th-large:before{content:""}.fa-th:before{content:""}.fa-th-list:before{content:""}.fa-check:before{content:""}.fa-close:before,.fa-remove:before,.fa-times:before{content:""}.fa-search-plus:before{content:""}.fa-search-minus:before{content:""}.fa-power-off:before{content:""}.fa-signal:before{content:""}.fa-cog:before,.fa-gear:before{content:""}.fa-trash-o:before{content:""}.fa-home:before,.icon-home:before{content:""}.fa-file-o:before{content:""}.fa-clock-o:before{content:""}.fa-road:before{content:""}.fa-download:before,.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{content:""}.fa-arrow-circle-o-down:before{content:""}.fa-arrow-circle-o-up:before{content:""}.fa-inbox:before{content:""}.fa-play-circle-o:before{content:""}.fa-repeat:before,.fa-rotate-right:before{content:""}.fa-refresh:before{content:""}.fa-list-alt:before{content:""}.fa-lock:before{content:""}.fa-flag:before{content:""}.fa-headphones:before{content:""}.fa-volume-off:before{content:""}.fa-volume-down:before{content:""}.fa-volume-up:before{content:""}.fa-qrcode:before{content:""}.fa-barcode:before{content:""}.fa-tag:before{content:""}.fa-tags:before{content:""}.fa-book:before,.icon-book:before{content:""}.fa-bookmark:before{content:""}.fa-print:before{content:""}.fa-camera:before{content:""}.fa-font:before{content:""}.fa-bold:before{content:""}.fa-italic:before{content:""}.fa-text-height:before{content:""}.fa-text-width:before{content:""}.fa-align-left:before{content:""}.fa-align-center:before{content:""}.fa-align-right:before{content:""}.fa-align-justify:before{content:""}.fa-list:before{content:""}.fa-dedent:before,.fa-outdent:before{content:""}.fa-indent:before{content:""}.fa-video-camera:before{content:""}.fa-image:before,.fa-photo:before,.fa-picture-o:before{content:""}.fa-pencil:before{content:""}.fa-map-marker:before{content:""}.fa-adjust:before{content:""}.fa-tint:before{content:""}.fa-edit:before,.fa-pencil-square-o:before{content:""}.fa-share-square-o:before{content:""}.fa-check-square-o:before{content:""}.fa-arrows:before{content:""}.fa-step-backward:before{content:""}.fa-fast-backward:before{content:""}.fa-backward:before{content:""}.fa-play:before{content:""}.fa-pause:before{content:""}.fa-stop:before{content:""}.fa-forward:before{content:""}.fa-fast-forward:before{content:""}.fa-step-forward:before{content:""}.fa-eject:before{content:""}.fa-chevron-left:before{content:""}.fa-chevron-right:before{content:""}.fa-plus-circle:before{content:""}.fa-minus-circle:before{content:""}.fa-times-circle:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before{content:""}.fa-check-circle:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before{content:""}.fa-question-circle:before{content:""}.fa-info-circle:before{content:""}.fa-crosshairs:before{content:""}.fa-times-circle-o:before{content:""}.fa-check-circle-o:before{content:""}.fa-ban:before{content:""}.fa-arrow-left:before{content:""}.fa-arrow-right:before{content:""}.fa-arrow-up:before{content:""}.fa-arrow-down:before{content:""}.fa-mail-forward:before,.fa-share:before{content:""}.fa-expand:before{content:""}.fa-compress:before{content:""}.fa-plus:before{content:""}.fa-minus:before{content:""}.fa-asterisk:before{content:""}.fa-exclamation-circle:before,.rst-content .admonition-title:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before{content:""}.fa-gift:before{content:""}.fa-leaf:before{content:""}.fa-fire:before,.icon-fire:before{content:""}.fa-eye:before{content:""}.fa-eye-slash:before{content:""}.fa-exclamation-triangle:before,.fa-warning:before{content:""}.fa-plane:before{content:""}.fa-calendar:before{content:""}.fa-random:before{content:""}.fa-comment:before{content:""}.fa-magnet:before{content:""}.fa-chevron-up:before{content:""}.fa-chevron-down:before{content:""}.fa-retweet:before{content:""}.fa-shopping-cart:before{content:""}.fa-folder:before{content:""}.fa-folder-open:before{content:""}.fa-arrows-v:before{content:""}.fa-arrows-h:before{content:""}.fa-bar-chart-o:before,.fa-bar-chart:before{content:""}.fa-twitter-square:before{content:""}.fa-facebook-square:before{content:""}.fa-camera-retro:before{content:""}.fa-key:before{content:""}.fa-cogs:before,.fa-gears:before{content:""}.fa-comments:before{content:""}.fa-thumbs-o-up:before{content:""}.fa-thumbs-o-down:before{content:""}.fa-star-half:before{content:""}.fa-heart-o:before{content:""}.fa-sign-out:before{content:""}.fa-linkedin-square:before{content:""}.fa-thumb-tack:before{content:""}.fa-external-link:before{content:""}.fa-sign-in:before{content:""}.fa-trophy:before{content:""}.fa-github-square:before{content:""}.fa-upload:before{content:""}.fa-lemon-o:before{content:""}.fa-phone:before{content:""}.fa-square-o:before{content:""}.fa-bookmark-o:before{content:""}.fa-phone-square:before{content:""}.fa-twitter:before{content:""}.fa-facebook-f:before,.fa-facebook:before{content:""}.fa-github:before,.icon-github:before{content:""}.fa-unlock:before{content:""}.fa-credit-card:before{content:""}.fa-feed:before,.fa-rss:before{content:""}.fa-hdd-o:before{content:""}.fa-bullhorn:before{content:""}.fa-bell:before{content:""}.fa-certificate:before{content:""}.fa-hand-o-right:before{content:""}.fa-hand-o-left:before{content:""}.fa-hand-o-up:before{content:""}.fa-hand-o-down:before{content:""}.fa-arrow-circle-left:before,.icon-circle-arrow-left:before{content:""}.fa-arrow-circle-right:before,.icon-circle-arrow-right:before{content:""}.fa-arrow-circle-up:before{content:""}.fa-arrow-circle-down:before{content:""}.fa-globe:before{content:""}.fa-wrench:before{content:""}.fa-tasks:before{content:""}.fa-filter:before{content:""}.fa-briefcase:before{content:""}.fa-arrows-alt:before{content:""}.fa-group:before,.fa-users:before{content:""}.fa-chain:before,.fa-link:before,.icon-link:before{content:""}.fa-cloud:before{content:""}.fa-flask:before{content:""}.fa-cut:before,.fa-scissors:before{content:""}.fa-copy:before,.fa-files-o:before{content:""}.fa-paperclip:before{content:""}.fa-floppy-o:before,.fa-save:before{content:""}.fa-square:before{content:""}.fa-bars:before,.fa-navicon:before,.fa-reorder:before{content:""}.fa-list-ul:before{content:""}.fa-list-ol:before{content:""}.fa-strikethrough:before{content:""}.fa-underline:before{content:""}.fa-table:before{content:""}.fa-magic:before{content:""}.fa-truck:before{content:""}.fa-pinterest:before{content:""}.fa-pinterest-square:before{content:""}.fa-google-plus-square:before{content:""}.fa-google-plus:before{content:""}.fa-money:before{content:""}.fa-caret-down:before,.icon-caret-down:before,.wy-dropdown .caret:before{content:""}.fa-caret-up:before{content:""}.fa-caret-left:before{content:""}.fa-caret-right:before{content:""}.fa-columns:before{content:""}.fa-sort:before,.fa-unsorted:before{content:""}.fa-sort-desc:before,.fa-sort-down:before{content:""}.fa-sort-asc:before,.fa-sort-up:before{content:""}.fa-envelope:before{content:""}.fa-linkedin:before{content:""}.fa-rotate-left:before,.fa-undo:before{content:""}.fa-gavel:before,.fa-legal:before{content:""}.fa-dashboard:before,.fa-tachometer:before{content:""}.fa-comment-o:before{content:""}.fa-comments-o:before{content:""}.fa-bolt:before,.fa-flash:before{content:""}.fa-sitemap:before{content:""}.fa-umbrella:before{content:""}.fa-clipboard:before,.fa-paste:before{content:""}.fa-lightbulb-o:before{content:""}.fa-exchange:before{content:""}.fa-cloud-download:before{content:""}.fa-cloud-upload:before{content:""}.fa-user-md:before{content:""}.fa-stethoscope:before{content:""}.fa-suitcase:before{content:""}.fa-bell-o:before{content:""}.fa-coffee:before{content:""}.fa-cutlery:before{content:""}.fa-file-text-o:before{content:""}.fa-building-o:before{content:""}.fa-hospital-o:before{content:""}.fa-ambulance:before{content:""}.fa-medkit:before{content:""}.fa-fighter-jet:before{content:""}.fa-beer:before{content:""}.fa-h-square:before{content:""}.fa-plus-square:before{content:""}.fa-angle-double-left:before{content:""}.fa-angle-double-right:before{content:""}.fa-angle-double-up:before{content:""}.fa-angle-double-down:before{content:""}.fa-angle-left:before{content:""}.fa-angle-right:before{content:""}.fa-angle-up:before{content:""}.fa-angle-down:before{content:""}.fa-desktop:before{content:""}.fa-laptop:before{content:""}.fa-tablet:before{content:""}.fa-mobile-phone:before,.fa-mobile:before{content:""}.fa-circle-o:before{content:""}.fa-quote-left:before{content:""}.fa-quote-right:before{content:""}.fa-spinner:before{content:""}.fa-circle:before{content:""}.fa-mail-reply:before,.fa-reply:before{content:""}.fa-github-alt:before{content:""}.fa-folder-o:before{content:""}.fa-folder-open-o:before{content:""}.fa-smile-o:before{content:""}.fa-frown-o:before{content:""}.fa-meh-o:before{content:""}.fa-gamepad:before{content:""}.fa-keyboard-o:before{content:""}.fa-flag-o:before{content:""}.fa-flag-checkered:before{content:""}.fa-terminal:before{content:""}.fa-code:before{content:""}.fa-mail-reply-all:before,.fa-reply-all:before{content:""}.fa-star-half-empty:before,.fa-star-half-full:before,.fa-star-half-o:before{content:""}.fa-location-arrow:before{content:""}.fa-crop:before{content:""}.fa-code-fork:before{content:""}.fa-chain-broken:before,.fa-unlink:before{content:""}.fa-question:before{content:""}.fa-info:before{content:""}.fa-exclamation:before{content:""}.fa-superscript:before{content:""}.fa-subscript:before{content:""}.fa-eraser:before{content:""}.fa-puzzle-piece:before{content:""}.fa-microphone:before{content:""}.fa-microphone-slash:before{content:""}.fa-shield:before{content:""}.fa-calendar-o:before{content:""}.fa-fire-extinguisher:before{content:""}.fa-rocket:before{content:""}.fa-maxcdn:before{content:""}.fa-chevron-circle-left:before{content:""}.fa-chevron-circle-right:before{content:""}.fa-chevron-circle-up:before{content:""}.fa-chevron-circle-down:before{content:""}.fa-html5:before{content:""}.fa-css3:before{content:""}.fa-anchor:before{content:""}.fa-unlock-alt:before{content:""}.fa-bullseye:before{content:""}.fa-ellipsis-h:before{content:""}.fa-ellipsis-v:before{content:""}.fa-rss-square:before{content:""}.fa-play-circle:before{content:""}.fa-ticket:before{content:""}.fa-minus-square:before{content:""}.fa-minus-square-o:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before{content:""}.fa-level-up:before{content:""}.fa-level-down:before{content:""}.fa-check-square:before{content:""}.fa-pencil-square:before{content:""}.fa-external-link-square:before{content:""}.fa-share-square:before{content:""}.fa-compass:before{content:""}.fa-caret-square-o-down:before,.fa-toggle-down:before{content:""}.fa-caret-square-o-up:before,.fa-toggle-up:before{content:""}.fa-caret-square-o-right:before,.fa-toggle-right:before{content:""}.fa-eur:before,.fa-euro:before{content:""}.fa-gbp:before{content:""}.fa-dollar:before,.fa-usd:before{content:""}.fa-inr:before,.fa-rupee:before{content:""}.fa-cny:before,.fa-jpy:before,.fa-rmb:before,.fa-yen:before{content:""}.fa-rouble:before,.fa-rub:before,.fa-ruble:before{content:""}.fa-krw:before,.fa-won:before{content:""}.fa-bitcoin:before,.fa-btc:before{content:""}.fa-file:before{content:""}.fa-file-text:before{content:""}.fa-sort-alpha-asc:before{content:""}.fa-sort-alpha-desc:before{content:""}.fa-sort-amount-asc:before{content:""}.fa-sort-amount-desc:before{content:""}.fa-sort-numeric-asc:before{content:""}.fa-sort-numeric-desc:before{content:""}.fa-thumbs-up:before{content:""}.fa-thumbs-down:before{content:""}.fa-youtube-square:before{content:""}.fa-youtube:before{content:""}.fa-xing:before{content:""}.fa-xing-square:before{content:""}.fa-youtube-play:before{content:""}.fa-dropbox:before{content:""}.fa-stack-overflow:before{content:""}.fa-instagram:before{content:""}.fa-flickr:before{content:""}.fa-adn:before{content:""}.fa-bitbucket:before,.icon-bitbucket:before{content:""}.fa-bitbucket-square:before{content:""}.fa-tumblr:before{content:""}.fa-tumblr-square:before{content:""}.fa-long-arrow-down:before{content:""}.fa-long-arrow-up:before{content:""}.fa-long-arrow-left:before{content:""}.fa-long-arrow-right:before{content:""}.fa-apple:before{content:""}.fa-windows:before{content:""}.fa-android:before{content:""}.fa-linux:before{content:""}.fa-dribbble:before{content:""}.fa-skype:before{content:""}.fa-foursquare:before{content:""}.fa-trello:before{content:""}.fa-female:before{content:""}.fa-male:before{content:""}.fa-gittip:before,.fa-gratipay:before{content:""}.fa-sun-o:before{content:""}.fa-moon-o:before{content:""}.fa-archive:before{content:""}.fa-bug:before{content:""}.fa-vk:before{content:""}.fa-weibo:before{content:""}.fa-renren:before{content:""}.fa-pagelines:before{content:""}.fa-stack-exchange:before{content:""}.fa-arrow-circle-o-right:before{content:""}.fa-arrow-circle-o-left:before{content:""}.fa-caret-square-o-left:before,.fa-toggle-left:before{content:""}.fa-dot-circle-o:before{content:""}.fa-wheelchair:before{content:""}.fa-vimeo-square:before{content:""}.fa-try:before,.fa-turkish-lira:before{content:""}.fa-plus-square-o:before,.wy-menu-vertical li button.toctree-expand:before{content:""}.fa-space-shuttle:before{content:""}.fa-slack:before{content:""}.fa-envelope-square:before{content:""}.fa-wordpress:before{content:""}.fa-openid:before{content:""}.fa-bank:before,.fa-institution:before,.fa-university:before{content:""}.fa-graduation-cap:before,.fa-mortar-board:before{content:""}.fa-yahoo:before{content:""}.fa-google:before{content:""}.fa-reddit:before{content:""}.fa-reddit-square:before{content:""}.fa-stumbleupon-circle:before{content:""}.fa-stumbleupon:before{content:""}.fa-delicious:before{content:""}.fa-digg:before{content:""}.fa-pied-piper-pp:before{content:""}.fa-pied-piper-alt:before{content:""}.fa-drupal:before{content:""}.fa-joomla:before{content:""}.fa-language:before{content:""}.fa-fax:before{content:""}.fa-building:before{content:""}.fa-child:before{content:""}.fa-paw:before{content:""}.fa-spoon:before{content:""}.fa-cube:before{content:""}.fa-cubes:before{content:""}.fa-behance:before{content:""}.fa-behance-square:before{content:""}.fa-steam:before{content:""}.fa-steam-square:before{content:""}.fa-recycle:before{content:""}.fa-automobile:before,.fa-car:before{content:""}.fa-cab:before,.fa-taxi:before{content:""}.fa-tree:before{content:""}.fa-spotify:before{content:""}.fa-deviantart:before{content:""}.fa-soundcloud:before{content:""}.fa-database:before{content:""}.fa-file-pdf-o:before{content:""}.fa-file-word-o:before{content:""}.fa-file-excel-o:before{content:""}.fa-file-powerpoint-o:before{content:""}.fa-file-image-o:before,.fa-file-photo-o:before,.fa-file-picture-o:before{content:""}.fa-file-archive-o:before,.fa-file-zip-o:before{content:""}.fa-file-audio-o:before,.fa-file-sound-o:before{content:""}.fa-file-movie-o:before,.fa-file-video-o:before{content:""}.fa-file-code-o:before{content:""}.fa-vine:before{content:""}.fa-codepen:before{content:""}.fa-jsfiddle:before{content:""}.fa-life-bouy:before,.fa-life-buoy:before,.fa-life-ring:before,.fa-life-saver:before,.fa-support:before{content:""}.fa-circle-o-notch:before{content:""}.fa-ra:before,.fa-rebel:before,.fa-resistance:before{content:""}.fa-empire:before,.fa-ge:before{content:""}.fa-git-square:before{content:""}.fa-git:before{content:""}.fa-hacker-news:before,.fa-y-combinator-square:before,.fa-yc-square:before{content:""}.fa-tencent-weibo:before{content:""}.fa-qq:before{content:""}.fa-wechat:before,.fa-weixin:before{content:""}.fa-paper-plane:before,.fa-send:before{content:""}.fa-paper-plane-o:before,.fa-send-o:before{content:""}.fa-history:before{content:""}.fa-circle-thin:before{content:""}.fa-header:before{content:""}.fa-paragraph:before{content:""}.fa-sliders:before{content:""}.fa-share-alt:before{content:""}.fa-share-alt-square:before{content:""}.fa-bomb:before{content:""}.fa-futbol-o:before,.fa-soccer-ball-o:before{content:""}.fa-tty:before{content:""}.fa-binoculars:before{content:""}.fa-plug:before{content:""}.fa-slideshare:before{content:""}.fa-twitch:before{content:""}.fa-yelp:before{content:""}.fa-newspaper-o:before{content:""}.fa-wifi:before{content:""}.fa-calculator:before{content:""}.fa-paypal:before{content:""}.fa-google-wallet:before{content:""}.fa-cc-visa:before{content:""}.fa-cc-mastercard:before{content:""}.fa-cc-discover:before{content:""}.fa-cc-amex:before{content:""}.fa-cc-paypal:before{content:""}.fa-cc-stripe:before{content:""}.fa-bell-slash:before{content:""}.fa-bell-slash-o:before{content:""}.fa-trash:before{content:""}.fa-copyright:before{content:""}.fa-at:before{content:""}.fa-eyedropper:before{content:""}.fa-paint-brush:before{content:""}.fa-birthday-cake:before{content:""}.fa-area-chart:before{content:""}.fa-pie-chart:before{content:""}.fa-line-chart:before{content:""}.fa-lastfm:before{content:""}.fa-lastfm-square:before{content:""}.fa-toggle-off:before{content:""}.fa-toggle-on:before{content:""}.fa-bicycle:before{content:""}.fa-bus:before{content:""}.fa-ioxhost:before{content:""}.fa-angellist:before{content:""}.fa-cc:before{content:""}.fa-ils:before,.fa-shekel:before,.fa-sheqel:before{content:""}.fa-meanpath:before{content:""}.fa-buysellads:before{content:""}.fa-connectdevelop:before{content:""}.fa-dashcube:before{content:""}.fa-forumbee:before{content:""}.fa-leanpub:before{content:""}.fa-sellsy:before{content:""}.fa-shirtsinbulk:before{content:""}.fa-simplybuilt:before{content:""}.fa-skyatlas:before{content:""}.fa-cart-plus:before{content:""}.fa-cart-arrow-down:before{content:""}.fa-diamond:before{content:""}.fa-ship:before{content:""}.fa-user-secret:before{content:""}.fa-motorcycle:before{content:""}.fa-street-view:before{content:""}.fa-heartbeat:before{content:""}.fa-venus:before{content:""}.fa-mars:before{content:""}.fa-mercury:before{content:""}.fa-intersex:before,.fa-transgender:before{content:""}.fa-transgender-alt:before{content:""}.fa-venus-double:before{content:""}.fa-mars-double:before{content:""}.fa-venus-mars:before{content:""}.fa-mars-stroke:before{content:""}.fa-mars-stroke-v:before{content:""}.fa-mars-stroke-h:before{content:""}.fa-neuter:before{content:""}.fa-genderless:before{content:""}.fa-facebook-official:before{content:""}.fa-pinterest-p:before{content:""}.fa-whatsapp:before{content:""}.fa-server:before{content:""}.fa-user-plus:before{content:""}.fa-user-times:before{content:""}.fa-bed:before,.fa-hotel:before{content:""}.fa-viacoin:before{content:""}.fa-train:before{content:""}.fa-subway:before{content:""}.fa-medium:before{content:""}.fa-y-combinator:before,.fa-yc:before{content:""}.fa-optin-monster:before{content:""}.fa-opencart:before{content:""}.fa-expeditedssl:before{content:""}.fa-battery-4:before,.fa-battery-full:before,.fa-battery:before{content:""}.fa-battery-3:before,.fa-battery-three-quarters:before{content:""}.fa-battery-2:before,.fa-battery-half:before{content:""}.fa-battery-1:before,.fa-battery-quarter:before{content:""}.fa-battery-0:before,.fa-battery-empty:before{content:""}.fa-mouse-pointer:before{content:""}.fa-i-cursor:before{content:""}.fa-object-group:before{content:""}.fa-object-ungroup:before{content:""}.fa-sticky-note:before{content:""}.fa-sticky-note-o:before{content:""}.fa-cc-jcb:before{content:""}.fa-cc-diners-club:before{content:""}.fa-clone:before{content:""}.fa-balance-scale:before{content:""}.fa-hourglass-o:before{content:""}.fa-hourglass-1:before,.fa-hourglass-start:before{content:""}.fa-hourglass-2:before,.fa-hourglass-half:before{content:""}.fa-hourglass-3:before,.fa-hourglass-end:before{content:""}.fa-hourglass:before{content:""}.fa-hand-grab-o:before,.fa-hand-rock-o:before{content:""}.fa-hand-paper-o:before,.fa-hand-stop-o:before{content:""}.fa-hand-scissors-o:before{content:""}.fa-hand-lizard-o:before{content:""}.fa-hand-spock-o:before{content:""}.fa-hand-pointer-o:before{content:""}.fa-hand-peace-o:before{content:""}.fa-trademark:before{content:""}.fa-registered:before{content:""}.fa-creative-commons:before{content:""}.fa-gg:before{content:""}.fa-gg-circle:before{content:""}.fa-tripadvisor:before{content:""}.fa-odnoklassniki:before{content:""}.fa-odnoklassniki-square:before{content:""}.fa-get-pocket:before{content:""}.fa-wikipedia-w:before{content:""}.fa-safari:before{content:""}.fa-chrome:before{content:""}.fa-firefox:before{content:""}.fa-opera:before{content:""}.fa-internet-explorer:before{content:""}.fa-television:before,.fa-tv:before{content:""}.fa-contao:before{content:""}.fa-500px:before{content:""}.fa-amazon:before{content:""}.fa-calendar-plus-o:before{content:""}.fa-calendar-minus-o:before{content:""}.fa-calendar-times-o:before{content:""}.fa-calendar-check-o:before{content:""}.fa-industry:before{content:""}.fa-map-pin:before{content:""}.fa-map-signs:before{content:""}.fa-map-o:before{content:""}.fa-map:before{content:""}.fa-commenting:before{content:""}.fa-commenting-o:before{content:""}.fa-houzz:before{content:""}.fa-vimeo:before{content:""}.fa-black-tie:before{content:""}.fa-fonticons:before{content:""}.fa-reddit-alien:before{content:""}.fa-edge:before{content:""}.fa-credit-card-alt:before{content:""}.fa-codiepie:before{content:""}.fa-modx:before{content:""}.fa-fort-awesome:before{content:""}.fa-usb:before{content:""}.fa-product-hunt:before{content:""}.fa-mixcloud:before{content:""}.fa-scribd:before{content:""}.fa-pause-circle:before{content:""}.fa-pause-circle-o:before{content:""}.fa-stop-circle:before{content:""}.fa-stop-circle-o:before{content:""}.fa-shopping-bag:before{content:""}.fa-shopping-basket:before{content:""}.fa-hashtag:before{content:""}.fa-bluetooth:before{content:""}.fa-bluetooth-b:before{content:""}.fa-percent:before{content:""}.fa-gitlab:before,.icon-gitlab:before{content:""}.fa-wpbeginner:before{content:""}.fa-wpforms:before{content:""}.fa-envira:before{content:""}.fa-universal-access:before{content:""}.fa-wheelchair-alt:before{content:""}.fa-question-circle-o:before{content:""}.fa-blind:before{content:""}.fa-audio-description:before{content:""}.fa-volume-control-phone:before{content:""}.fa-braille:before{content:""}.fa-assistive-listening-systems:before{content:""}.fa-american-sign-language-interpreting:before,.fa-asl-interpreting:before{content:""}.fa-deaf:before,.fa-deafness:before,.fa-hard-of-hearing:before{content:""}.fa-glide:before{content:""}.fa-glide-g:before{content:""}.fa-sign-language:before,.fa-signing:before{content:""}.fa-low-vision:before{content:""}.fa-viadeo:before{content:""}.fa-viadeo-square:before{content:""}.fa-snapchat:before{content:""}.fa-snapchat-ghost:before{content:""}.fa-snapchat-square:before{content:""}.fa-pied-piper:before{content:""}.fa-first-order:before{content:""}.fa-yoast:before{content:""}.fa-themeisle:before{content:""}.fa-google-plus-circle:before,.fa-google-plus-official:before{content:""}.fa-fa:before,.fa-font-awesome:before{content:""}.fa-handshake-o:before{content:""}.fa-envelope-open:before{content:""}.fa-envelope-open-o:before{content:""}.fa-linode:before{content:""}.fa-address-book:before{content:""}.fa-address-book-o:before{content:""}.fa-address-card:before,.fa-vcard:before{content:""}.fa-address-card-o:before,.fa-vcard-o:before{content:""}.fa-user-circle:before{content:""}.fa-user-circle-o:before{content:""}.fa-user-o:before{content:""}.fa-id-badge:before{content:""}.fa-drivers-license:before,.fa-id-card:before{content:""}.fa-drivers-license-o:before,.fa-id-card-o:before{content:""}.fa-quora:before{content:""}.fa-free-code-camp:before{content:""}.fa-telegram:before{content:""}.fa-thermometer-4:before,.fa-thermometer-full:before,.fa-thermometer:before{content:""}.fa-thermometer-3:before,.fa-thermometer-three-quarters:before{content:""}.fa-thermometer-2:before,.fa-thermometer-half:before{content:""}.fa-thermometer-1:before,.fa-thermometer-quarter:before{content:""}.fa-thermometer-0:before,.fa-thermometer-empty:before{content:""}.fa-shower:before{content:""}.fa-bath:before,.fa-bathtub:before,.fa-s15:before{content:""}.fa-podcast:before{content:""}.fa-window-maximize:before{content:""}.fa-window-minimize:before{content:""}.fa-window-restore:before{content:""}.fa-times-rectangle:before,.fa-window-close:before{content:""}.fa-times-rectangle-o:before,.fa-window-close-o:before{content:""}.fa-bandcamp:before{content:""}.fa-grav:before{content:""}.fa-etsy:before{content:""}.fa-imdb:before{content:""}.fa-ravelry:before{content:""}.fa-eercast:before{content:""}.fa-microchip:before{content:""}.fa-snowflake-o:before{content:""}.fa-superpowers:before{content:""}.fa-wpexplorer:before{content:""}.fa-meetup:before{content:""}.sr-only{position:absolute;width:1px;height:1px;padding:0;margin:-1px;overflow:hidden;clip:rect(0,0,0,0);border:0}.sr-only-focusable:active,.sr-only-focusable:focus{position:static;width:auto;height:auto;margin:0;overflow:visible;clip:auto}.fa,.icon,.rst-content .admonition-title,.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content code.download span:first-child,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink,.rst-content tt.download span:first-child,.wy-dropdown .caret,.wy-inline-validate.wy-inline-validate-danger .wy-input-context,.wy-inline-validate.wy-inline-validate-info .wy-input-context,.wy-inline-validate.wy-inline-validate-success .wy-input-context,.wy-inline-validate.wy-inline-validate-warning .wy-input-context,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li button.toctree-expand{font-family:inherit}.fa:before,.icon:before,.rst-content .admonition-title:before,.rst-content .code-block-caption .headerlink:before,.rst-content .eqno .headerlink:before,.rst-content code.download span:first-child:before,.rst-content dl dt .headerlink:before,.rst-content h1 .headerlink:before,.rst-content h2 .headerlink:before,.rst-content h3 .headerlink:before,.rst-content h4 .headerlink:before,.rst-content h5 .headerlink:before,.rst-content h6 .headerlink:before,.rst-content p.caption .headerlink:before,.rst-content p .headerlink:before,.rst-content table>caption .headerlink:before,.rst-content tt.download span:first-child:before,.wy-dropdown .caret:before,.wy-inline-validate.wy-inline-validate-danger .wy-input-context:before,.wy-inline-validate.wy-inline-validate-info .wy-input-context:before,.wy-inline-validate.wy-inline-validate-success .wy-input-context:before,.wy-inline-validate.wy-inline-validate-warning .wy-input-context:before,.wy-menu-vertical li.current>a button.toctree-expand:before,.wy-menu-vertical li.on a button.toctree-expand:before,.wy-menu-vertical li button.toctree-expand:before{font-family:FontAwesome;display:inline-block;font-style:normal;font-weight:400;line-height:1;text-decoration:inherit}.rst-content .code-block-caption a .headerlink,.rst-content .eqno a .headerlink,.rst-content a .admonition-title,.rst-content code.download a span:first-child,.rst-content dl dt a .headerlink,.rst-content h1 a .headerlink,.rst-content h2 a .headerlink,.rst-content h3 a .headerlink,.rst-content h4 a .headerlink,.rst-content h5 a .headerlink,.rst-content h6 a .headerlink,.rst-content p.caption a .headerlink,.rst-content p a .headerlink,.rst-content table>caption a .headerlink,.rst-content tt.download a span:first-child,.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand,.wy-menu-vertical li a button.toctree-expand,a .fa,a .icon,a .rst-content .admonition-title,a .rst-content .code-block-caption .headerlink,a .rst-content .eqno .headerlink,a .rst-content code.download span:first-child,a .rst-content dl dt .headerlink,a .rst-content h1 .headerlink,a .rst-content h2 .headerlink,a .rst-content h3 .headerlink,a .rst-content h4 .headerlink,a .rst-content h5 .headerlink,a .rst-content h6 .headerlink,a .rst-content p.caption .headerlink,a .rst-content p .headerlink,a .rst-content table>caption .headerlink,a .rst-content tt.download span:first-child,a .wy-menu-vertical li button.toctree-expand{display:inline-block;text-decoration:inherit}.btn .fa,.btn .icon,.btn .rst-content .admonition-title,.btn .rst-content .code-block-caption .headerlink,.btn .rst-content .eqno .headerlink,.btn .rst-content code.download span:first-child,.btn .rst-content dl dt .headerlink,.btn .rst-content h1 .headerlink,.btn .rst-content h2 .headerlink,.btn .rst-content h3 .headerlink,.btn .rst-content h4 .headerlink,.btn .rst-content h5 .headerlink,.btn .rst-content h6 .headerlink,.btn .rst-content p .headerlink,.btn .rst-content table>caption .headerlink,.btn .rst-content tt.download span:first-child,.btn .wy-menu-vertical li.current>a button.toctree-expand,.btn .wy-menu-vertical li.on a button.toctree-expand,.btn .wy-menu-vertical li button.toctree-expand,.nav .fa,.nav .icon,.nav .rst-content .admonition-title,.nav .rst-content .code-block-caption .headerlink,.nav .rst-content .eqno .headerlink,.nav .rst-content code.download span:first-child,.nav .rst-content dl dt .headerlink,.nav .rst-content h1 .headerlink,.nav .rst-content h2 .headerlink,.nav .rst-content h3 .headerlink,.nav .rst-content h4 .headerlink,.nav .rst-content h5 .headerlink,.nav .rst-content h6 .headerlink,.nav .rst-content p .headerlink,.nav .rst-content table>caption .headerlink,.nav .rst-content tt.download span:first-child,.nav .wy-menu-vertical li.current>a button.toctree-expand,.nav .wy-menu-vertical li.on a button.toctree-expand,.nav .wy-menu-vertical li button.toctree-expand,.rst-content .btn .admonition-title,.rst-content .code-block-caption .btn .headerlink,.rst-content .code-block-caption .nav .headerlink,.rst-content .eqno .btn .headerlink,.rst-content .eqno .nav .headerlink,.rst-content .nav .admonition-title,.rst-content code.download .btn span:first-child,.rst-content code.download .nav span:first-child,.rst-content dl dt .btn .headerlink,.rst-content dl dt .nav .headerlink,.rst-content h1 .btn .headerlink,.rst-content h1 .nav .headerlink,.rst-content h2 .btn .headerlink,.rst-content h2 .nav .headerlink,.rst-content h3 .btn .headerlink,.rst-content h3 .nav .headerlink,.rst-content h4 .btn .headerlink,.rst-content h4 .nav .headerlink,.rst-content h5 .btn .headerlink,.rst-content h5 .nav .headerlink,.rst-content h6 .btn .headerlink,.rst-content h6 .nav .headerlink,.rst-content p .btn .headerlink,.rst-content p .nav .headerlink,.rst-content table>caption .btn .headerlink,.rst-content table>caption .nav .headerlink,.rst-content tt.download .btn span:first-child,.rst-content tt.download .nav span:first-child,.wy-menu-vertical li .btn button.toctree-expand,.wy-menu-vertical li.current>a .btn button.toctree-expand,.wy-menu-vertical li.current>a .nav button.toctree-expand,.wy-menu-vertical li .nav button.toctree-expand,.wy-menu-vertical li.on a .btn button.toctree-expand,.wy-menu-vertical li.on a .nav button.toctree-expand{display:inline}.btn .fa-large.icon,.btn .fa.fa-large,.btn .rst-content .code-block-caption .fa-large.headerlink,.btn .rst-content .eqno .fa-large.headerlink,.btn .rst-content .fa-large.admonition-title,.btn .rst-content code.download span.fa-large:first-child,.btn .rst-content dl dt .fa-large.headerlink,.btn .rst-content h1 .fa-large.headerlink,.btn .rst-content h2 .fa-large.headerlink,.btn .rst-content h3 .fa-large.headerlink,.btn .rst-content h4 .fa-large.headerlink,.btn .rst-content h5 .fa-large.headerlink,.btn .rst-content h6 .fa-large.headerlink,.btn .rst-content p .fa-large.headerlink,.btn .rst-content table>caption .fa-large.headerlink,.btn .rst-content tt.download span.fa-large:first-child,.btn .wy-menu-vertical li button.fa-large.toctree-expand,.nav .fa-large.icon,.nav .fa.fa-large,.nav .rst-content .code-block-caption .fa-large.headerlink,.nav .rst-content .eqno .fa-large.headerlink,.nav .rst-content .fa-large.admonition-title,.nav .rst-content code.download span.fa-large:first-child,.nav .rst-content dl dt .fa-large.headerlink,.nav .rst-content h1 .fa-large.headerlink,.nav .rst-content h2 .fa-large.headerlink,.nav .rst-content h3 .fa-large.headerlink,.nav .rst-content h4 .fa-large.headerlink,.nav .rst-content h5 .fa-large.headerlink,.nav .rst-content h6 .fa-large.headerlink,.nav .rst-content p .fa-large.headerlink,.nav .rst-content table>caption .fa-large.headerlink,.nav .rst-content tt.download span.fa-large:first-child,.nav .wy-menu-vertical li button.fa-large.toctree-expand,.rst-content .btn .fa-large.admonition-title,.rst-content .code-block-caption .btn .fa-large.headerlink,.rst-content .code-block-caption .nav .fa-large.headerlink,.rst-content .eqno .btn .fa-large.headerlink,.rst-content .eqno .nav .fa-large.headerlink,.rst-content .nav .fa-large.admonition-title,.rst-content code.download .btn span.fa-large:first-child,.rst-content code.download .nav span.fa-large:first-child,.rst-content dl dt .btn .fa-large.headerlink,.rst-content dl dt .nav .fa-large.headerlink,.rst-content h1 .btn .fa-large.headerlink,.rst-content h1 .nav .fa-large.headerlink,.rst-content h2 .btn .fa-large.headerlink,.rst-content h2 .nav .fa-large.headerlink,.rst-content h3 .btn .fa-large.headerlink,.rst-content h3 .nav .fa-large.headerlink,.rst-content h4 .btn .fa-large.headerlink,.rst-content h4 .nav .fa-large.headerlink,.rst-content h5 .btn .fa-large.headerlink,.rst-content h5 .nav .fa-large.headerlink,.rst-content h6 .btn .fa-large.headerlink,.rst-content h6 .nav .fa-large.headerlink,.rst-content p .btn .fa-large.headerlink,.rst-content p .nav .fa-large.headerlink,.rst-content table>caption .btn .fa-large.headerlink,.rst-content table>caption .nav .fa-large.headerlink,.rst-content tt.download .btn span.fa-large:first-child,.rst-content tt.download .nav span.fa-large:first-child,.wy-menu-vertical li .btn button.fa-large.toctree-expand,.wy-menu-vertical li .nav button.fa-large.toctree-expand{line-height:.9em}.btn .fa-spin.icon,.btn .fa.fa-spin,.btn .rst-content .code-block-caption .fa-spin.headerlink,.btn .rst-content .eqno .fa-spin.headerlink,.btn .rst-content .fa-spin.admonition-title,.btn .rst-content code.download span.fa-spin:first-child,.btn .rst-content dl dt .fa-spin.headerlink,.btn .rst-content h1 .fa-spin.headerlink,.btn .rst-content h2 .fa-spin.headerlink,.btn .rst-content h3 .fa-spin.headerlink,.btn .rst-content h4 .fa-spin.headerlink,.btn .rst-content h5 .fa-spin.headerlink,.btn .rst-content h6 .fa-spin.headerlink,.btn .rst-content p .fa-spin.headerlink,.btn .rst-content table>caption .fa-spin.headerlink,.btn .rst-content tt.download span.fa-spin:first-child,.btn .wy-menu-vertical li button.fa-spin.toctree-expand,.nav .fa-spin.icon,.nav .fa.fa-spin,.nav .rst-content .code-block-caption .fa-spin.headerlink,.nav .rst-content .eqno .fa-spin.headerlink,.nav .rst-content .fa-spin.admonition-title,.nav .rst-content code.download span.fa-spin:first-child,.nav .rst-content dl dt .fa-spin.headerlink,.nav .rst-content h1 .fa-spin.headerlink,.nav .rst-content h2 .fa-spin.headerlink,.nav .rst-content h3 .fa-spin.headerlink,.nav .rst-content h4 .fa-spin.headerlink,.nav .rst-content h5 .fa-spin.headerlink,.nav .rst-content h6 .fa-spin.headerlink,.nav .rst-content p .fa-spin.headerlink,.nav .rst-content table>caption .fa-spin.headerlink,.nav .rst-content tt.download span.fa-spin:first-child,.nav .wy-menu-vertical li button.fa-spin.toctree-expand,.rst-content .btn .fa-spin.admonition-title,.rst-content .code-block-caption .btn .fa-spin.headerlink,.rst-content .code-block-caption .nav .fa-spin.headerlink,.rst-content .eqno .btn .fa-spin.headerlink,.rst-content .eqno .nav .fa-spin.headerlink,.rst-content .nav .fa-spin.admonition-title,.rst-content code.download .btn span.fa-spin:first-child,.rst-content code.download .nav span.fa-spin:first-child,.rst-content dl dt .btn .fa-spin.headerlink,.rst-content dl dt .nav .fa-spin.headerlink,.rst-content h1 .btn .fa-spin.headerlink,.rst-content h1 .nav .fa-spin.headerlink,.rst-content h2 .btn .fa-spin.headerlink,.rst-content h2 .nav .fa-spin.headerlink,.rst-content h3 .btn .fa-spin.headerlink,.rst-content h3 .nav .fa-spin.headerlink,.rst-content h4 .btn .fa-spin.headerlink,.rst-content h4 .nav .fa-spin.headerlink,.rst-content h5 .btn .fa-spin.headerlink,.rst-content h5 .nav .fa-spin.headerlink,.rst-content h6 .btn .fa-spin.headerlink,.rst-content h6 .nav .fa-spin.headerlink,.rst-content p .btn .fa-spin.headerlink,.rst-content p .nav .fa-spin.headerlink,.rst-content table>caption .btn .fa-spin.headerlink,.rst-content table>caption .nav .fa-spin.headerlink,.rst-content tt.download .btn span.fa-spin:first-child,.rst-content tt.download .nav span.fa-spin:first-child,.wy-menu-vertical li .btn button.fa-spin.toctree-expand,.wy-menu-vertical li .nav button.fa-spin.toctree-expand{display:inline-block}.btn.fa:before,.btn.icon:before,.rst-content .btn.admonition-title:before,.rst-content .code-block-caption .btn.headerlink:before,.rst-content .eqno .btn.headerlink:before,.rst-content code.download span.btn:first-child:before,.rst-content dl dt .btn.headerlink:before,.rst-content h1 .btn.headerlink:before,.rst-content h2 .btn.headerlink:before,.rst-content h3 .btn.headerlink:before,.rst-content h4 .btn.headerlink:before,.rst-content h5 .btn.headerlink:before,.rst-content h6 .btn.headerlink:before,.rst-content p .btn.headerlink:before,.rst-content table>caption .btn.headerlink:before,.rst-content tt.download span.btn:first-child:before,.wy-menu-vertical li button.btn.toctree-expand:before{opacity:.5;-webkit-transition:opacity .05s ease-in;-moz-transition:opacity .05s ease-in;transition:opacity .05s ease-in}.btn.fa:hover:before,.btn.icon:hover:before,.rst-content .btn.admonition-title:hover:before,.rst-content .code-block-caption .btn.headerlink:hover:before,.rst-content .eqno .btn.headerlink:hover:before,.rst-content code.download span.btn:first-child:hover:before,.rst-content dl dt .btn.headerlink:hover:before,.rst-content h1 .btn.headerlink:hover:before,.rst-content h2 .btn.headerlink:hover:before,.rst-content h3 .btn.headerlink:hover:before,.rst-content h4 .btn.headerlink:hover:before,.rst-content h5 .btn.headerlink:hover:before,.rst-content h6 .btn.headerlink:hover:before,.rst-content p .btn.headerlink:hover:before,.rst-content table>caption .btn.headerlink:hover:before,.rst-content tt.download span.btn:first-child:hover:before,.wy-menu-vertical li button.btn.toctree-expand:hover:before{opacity:1}.btn-mini .fa:before,.btn-mini .icon:before,.btn-mini .rst-content .admonition-title:before,.btn-mini .rst-content .code-block-caption .headerlink:before,.btn-mini .rst-content .eqno .headerlink:before,.btn-mini .rst-content code.download span:first-child:before,.btn-mini .rst-content dl dt .headerlink:before,.btn-mini .rst-content h1 .headerlink:before,.btn-mini .rst-content h2 .headerlink:before,.btn-mini .rst-content h3 .headerlink:before,.btn-mini .rst-content h4 .headerlink:before,.btn-mini .rst-content h5 .headerlink:before,.btn-mini .rst-content h6 .headerlink:before,.btn-mini .rst-content p .headerlink:before,.btn-mini .rst-content table>caption .headerlink:before,.btn-mini .rst-content tt.download span:first-child:before,.btn-mini .wy-menu-vertical li button.toctree-expand:before,.rst-content .btn-mini .admonition-title:before,.rst-content .code-block-caption .btn-mini .headerlink:before,.rst-content .eqno .btn-mini .headerlink:before,.rst-content code.download .btn-mini span:first-child:before,.rst-content dl dt .btn-mini .headerlink:before,.rst-content h1 .btn-mini .headerlink:before,.rst-content h2 .btn-mini .headerlink:before,.rst-content h3 .btn-mini .headerlink:before,.rst-content h4 .btn-mini .headerlink:before,.rst-content h5 .btn-mini .headerlink:before,.rst-content h6 .btn-mini .headerlink:before,.rst-content p .btn-mini .headerlink:before,.rst-content table>caption .btn-mini .headerlink:before,.rst-content tt.download .btn-mini span:first-child:before,.wy-menu-vertical li .btn-mini button.toctree-expand:before{font-size:14px;vertical-align:-15%}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning,.wy-alert{padding:12px;line-height:24px;margin-bottom:24px;background:#e7f2fa}.rst-content .admonition-title,.wy-alert-title{font-weight:700;display:block;color:#fff;background:#6ab0de;padding:6px 12px;margin:-12px -12px 12px}.rst-content .danger,.rst-content .error,.rst-content .wy-alert-danger.admonition,.rst-content .wy-alert-danger.admonition-todo,.rst-content .wy-alert-danger.attention,.rst-content .wy-alert-danger.caution,.rst-content .wy-alert-danger.hint,.rst-content .wy-alert-danger.important,.rst-content .wy-alert-danger.note,.rst-content .wy-alert-danger.seealso,.rst-content .wy-alert-danger.tip,.rst-content .wy-alert-danger.warning,.wy-alert.wy-alert-danger{background:#fdf3f2}.rst-content .danger .admonition-title,.rst-content .danger .wy-alert-title,.rst-content .error .admonition-title,.rst-content .error .wy-alert-title,.rst-content .wy-alert-danger.admonition-todo .admonition-title,.rst-content .wy-alert-danger.admonition-todo .wy-alert-title,.rst-content .wy-alert-danger.admonition .admonition-title,.rst-content .wy-alert-danger.admonition .wy-alert-title,.rst-content .wy-alert-danger.attention .admonition-title,.rst-content .wy-alert-danger.attention .wy-alert-title,.rst-content .wy-alert-danger.caution .admonition-title,.rst-content .wy-alert-danger.caution .wy-alert-title,.rst-content .wy-alert-danger.hint .admonition-title,.rst-content .wy-alert-danger.hint .wy-alert-title,.rst-content .wy-alert-danger.important .admonition-title,.rst-content .wy-alert-danger.important .wy-alert-title,.rst-content .wy-alert-danger.note .admonition-title,.rst-content .wy-alert-danger.note .wy-alert-title,.rst-content .wy-alert-danger.seealso .admonition-title,.rst-content .wy-alert-danger.seealso .wy-alert-title,.rst-content .wy-alert-danger.tip .admonition-title,.rst-content .wy-alert-danger.tip .wy-alert-title,.rst-content .wy-alert-danger.warning .admonition-title,.rst-content .wy-alert-danger.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-danger .admonition-title,.wy-alert.wy-alert-danger .rst-content .admonition-title,.wy-alert.wy-alert-danger .wy-alert-title{background:#f29f97}.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .warning,.rst-content .wy-alert-warning.admonition,.rst-content .wy-alert-warning.danger,.rst-content .wy-alert-warning.error,.rst-content .wy-alert-warning.hint,.rst-content .wy-alert-warning.important,.rst-content .wy-alert-warning.note,.rst-content .wy-alert-warning.seealso,.rst-content .wy-alert-warning.tip,.wy-alert.wy-alert-warning{background:#ffedcc}.rst-content .admonition-todo .admonition-title,.rst-content .admonition-todo .wy-alert-title,.rst-content .attention .admonition-title,.rst-content .attention .wy-alert-title,.rst-content .caution .admonition-title,.rst-content .caution .wy-alert-title,.rst-content .warning .admonition-title,.rst-content .warning .wy-alert-title,.rst-content .wy-alert-warning.admonition .admonition-title,.rst-content .wy-alert-warning.admonition .wy-alert-title,.rst-content .wy-alert-warning.danger .admonition-title,.rst-content .wy-alert-warning.danger .wy-alert-title,.rst-content .wy-alert-warning.error .admonition-title,.rst-content .wy-alert-warning.error .wy-alert-title,.rst-content .wy-alert-warning.hint .admonition-title,.rst-content .wy-alert-warning.hint .wy-alert-title,.rst-content .wy-alert-warning.important .admonition-title,.rst-content .wy-alert-warning.important .wy-alert-title,.rst-content .wy-alert-warning.note .admonition-title,.rst-content .wy-alert-warning.note .wy-alert-title,.rst-content .wy-alert-warning.seealso .admonition-title,.rst-content .wy-alert-warning.seealso .wy-alert-title,.rst-content .wy-alert-warning.tip .admonition-title,.rst-content .wy-alert-warning.tip .wy-alert-title,.rst-content .wy-alert.wy-alert-warning .admonition-title,.wy-alert.wy-alert-warning .rst-content .admonition-title,.wy-alert.wy-alert-warning .wy-alert-title{background:#f0b37e}.rst-content .note,.rst-content .seealso,.rst-content .wy-alert-info.admonition,.rst-content .wy-alert-info.admonition-todo,.rst-content .wy-alert-info.attention,.rst-content .wy-alert-info.caution,.rst-content .wy-alert-info.danger,.rst-content .wy-alert-info.error,.rst-content .wy-alert-info.hint,.rst-content .wy-alert-info.important,.rst-content .wy-alert-info.tip,.rst-content .wy-alert-info.warning,.wy-alert.wy-alert-info{background:#e7f2fa}.rst-content .note .admonition-title,.rst-content .note .wy-alert-title,.rst-content .seealso .admonition-title,.rst-content .seealso .wy-alert-title,.rst-content .wy-alert-info.admonition-todo .admonition-title,.rst-content .wy-alert-info.admonition-todo .wy-alert-title,.rst-content .wy-alert-info.admonition .admonition-title,.rst-content .wy-alert-info.admonition .wy-alert-title,.rst-content .wy-alert-info.attention .admonition-title,.rst-content .wy-alert-info.attention .wy-alert-title,.rst-content .wy-alert-info.caution .admonition-title,.rst-content .wy-alert-info.caution .wy-alert-title,.rst-content .wy-alert-info.danger .admonition-title,.rst-content .wy-alert-info.danger .wy-alert-title,.rst-content .wy-alert-info.error .admonition-title,.rst-content .wy-alert-info.error .wy-alert-title,.rst-content .wy-alert-info.hint .admonition-title,.rst-content .wy-alert-info.hint .wy-alert-title,.rst-content .wy-alert-info.important .admonition-title,.rst-content .wy-alert-info.important .wy-alert-title,.rst-content .wy-alert-info.tip .admonition-title,.rst-content .wy-alert-info.tip .wy-alert-title,.rst-content .wy-alert-info.warning .admonition-title,.rst-content .wy-alert-info.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-info .admonition-title,.wy-alert.wy-alert-info .rst-content .admonition-title,.wy-alert.wy-alert-info .wy-alert-title{background:#6ab0de}.rst-content .hint,.rst-content .important,.rst-content .tip,.rst-content .wy-alert-success.admonition,.rst-content .wy-alert-success.admonition-todo,.rst-content .wy-alert-success.attention,.rst-content .wy-alert-success.caution,.rst-content .wy-alert-success.danger,.rst-content .wy-alert-success.error,.rst-content .wy-alert-success.note,.rst-content .wy-alert-success.seealso,.rst-content .wy-alert-success.warning,.wy-alert.wy-alert-success{background:#dbfaf4}.rst-content .hint .admonition-title,.rst-content .hint .wy-alert-title,.rst-content .important .admonition-title,.rst-content .important .wy-alert-title,.rst-content .tip .admonition-title,.rst-content .tip .wy-alert-title,.rst-content .wy-alert-success.admonition-todo .admonition-title,.rst-content .wy-alert-success.admonition-todo .wy-alert-title,.rst-content .wy-alert-success.admonition .admonition-title,.rst-content .wy-alert-success.admonition .wy-alert-title,.rst-content .wy-alert-success.attention .admonition-title,.rst-content .wy-alert-success.attention .wy-alert-title,.rst-content .wy-alert-success.caution .admonition-title,.rst-content .wy-alert-success.caution .wy-alert-title,.rst-content .wy-alert-success.danger .admonition-title,.rst-content .wy-alert-success.danger .wy-alert-title,.rst-content .wy-alert-success.error .admonition-title,.rst-content .wy-alert-success.error .wy-alert-title,.rst-content .wy-alert-success.note .admonition-title,.rst-content .wy-alert-success.note .wy-alert-title,.rst-content .wy-alert-success.seealso .admonition-title,.rst-content .wy-alert-success.seealso .wy-alert-title,.rst-content .wy-alert-success.warning .admonition-title,.rst-content .wy-alert-success.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-success .admonition-title,.wy-alert.wy-alert-success .rst-content .admonition-title,.wy-alert.wy-alert-success .wy-alert-title{background:#1abc9c}.rst-content .wy-alert-neutral.admonition,.rst-content .wy-alert-neutral.admonition-todo,.rst-content .wy-alert-neutral.attention,.rst-content .wy-alert-neutral.caution,.rst-content .wy-alert-neutral.danger,.rst-content .wy-alert-neutral.error,.rst-content .wy-alert-neutral.hint,.rst-content .wy-alert-neutral.important,.rst-content .wy-alert-neutral.note,.rst-content .wy-alert-neutral.seealso,.rst-content .wy-alert-neutral.tip,.rst-content .wy-alert-neutral.warning,.wy-alert.wy-alert-neutral{background:#f3f6f6}.rst-content .wy-alert-neutral.admonition-todo .admonition-title,.rst-content .wy-alert-neutral.admonition-todo .wy-alert-title,.rst-content .wy-alert-neutral.admonition .admonition-title,.rst-content .wy-alert-neutral.admonition .wy-alert-title,.rst-content .wy-alert-neutral.attention .admonition-title,.rst-content .wy-alert-neutral.attention .wy-alert-title,.rst-content .wy-alert-neutral.caution .admonition-title,.rst-content .wy-alert-neutral.caution .wy-alert-title,.rst-content .wy-alert-neutral.danger .admonition-title,.rst-content .wy-alert-neutral.danger .wy-alert-title,.rst-content .wy-alert-neutral.error .admonition-title,.rst-content .wy-alert-neutral.error .wy-alert-title,.rst-content .wy-alert-neutral.hint .admonition-title,.rst-content .wy-alert-neutral.hint .wy-alert-title,.rst-content .wy-alert-neutral.important .admonition-title,.rst-content .wy-alert-neutral.important .wy-alert-title,.rst-content .wy-alert-neutral.note .admonition-title,.rst-content .wy-alert-neutral.note .wy-alert-title,.rst-content .wy-alert-neutral.seealso .admonition-title,.rst-content .wy-alert-neutral.seealso .wy-alert-title,.rst-content .wy-alert-neutral.tip .admonition-title,.rst-content .wy-alert-neutral.tip .wy-alert-title,.rst-content .wy-alert-neutral.warning .admonition-title,.rst-content .wy-alert-neutral.warning .wy-alert-title,.rst-content .wy-alert.wy-alert-neutral .admonition-title,.wy-alert.wy-alert-neutral .rst-content .admonition-title,.wy-alert.wy-alert-neutral .wy-alert-title{color:#404040;background:#e1e4e5}.rst-content .wy-alert-neutral.admonition-todo a,.rst-content .wy-alert-neutral.admonition a,.rst-content .wy-alert-neutral.attention a,.rst-content .wy-alert-neutral.caution a,.rst-content .wy-alert-neutral.danger a,.rst-content .wy-alert-neutral.error a,.rst-content .wy-alert-neutral.hint a,.rst-content .wy-alert-neutral.important a,.rst-content .wy-alert-neutral.note a,.rst-content .wy-alert-neutral.seealso a,.rst-content .wy-alert-neutral.tip a,.rst-content .wy-alert-neutral.warning a,.wy-alert.wy-alert-neutral a{color:#2980b9}.rst-content .admonition-todo p:last-child,.rst-content .admonition p:last-child,.rst-content .attention p:last-child,.rst-content .caution p:last-child,.rst-content .danger p:last-child,.rst-content .error p:last-child,.rst-content .hint p:last-child,.rst-content .important p:last-child,.rst-content .note p:last-child,.rst-content .seealso p:last-child,.rst-content .tip p:last-child,.rst-content .warning p:last-child,.wy-alert p:last-child{margin-bottom:0}.wy-tray-container{position:fixed;bottom:0;left:0;z-index:600}.wy-tray-container li{display:block;width:300px;background:transparent;color:#fff;text-align:center;box-shadow:0 5px 5px 0 rgba(0,0,0,.1);padding:0 24px;min-width:20%;opacity:0;height:0;line-height:56px;overflow:hidden;-webkit-transition:all .3s ease-in;-moz-transition:all .3s ease-in;transition:all .3s ease-in}.wy-tray-container li.wy-tray-item-success{background:#27ae60}.wy-tray-container li.wy-tray-item-info{background:#2980b9}.wy-tray-container li.wy-tray-item-warning{background:#e67e22}.wy-tray-container li.wy-tray-item-danger{background:#e74c3c}.wy-tray-container li.on{opacity:1;height:56px}@media screen and (max-width:768px){.wy-tray-container{bottom:auto;top:0;width:100%}.wy-tray-container li{width:100%}}button{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle;cursor:pointer;line-height:normal;-webkit-appearance:button;*overflow:visible}button::-moz-focus-inner,input::-moz-focus-inner{border:0;padding:0}button[disabled]{cursor:default}.btn{display:inline-block;border-radius:2px;line-height:normal;white-space:nowrap;text-align:center;cursor:pointer;font-size:100%;padding:6px 12px 8px;color:#fff;border:1px solid rgba(0,0,0,.1);background-color:#27ae60;text-decoration:none;font-weight:400;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 2px -1px hsla(0,0%,100%,.5),inset 0 -2px 0 0 rgba(0,0,0,.1);outline-none:false;vertical-align:middle;*display:inline;zoom:1;-webkit-user-drag:none;-webkit-user-select:none;-moz-user-select:none;-ms-user-select:none;user-select:none;-webkit-transition:all .1s linear;-moz-transition:all .1s linear;transition:all .1s linear}.btn-hover{background:#2e8ece;color:#fff}.btn:hover{background:#2cc36b;color:#fff}.btn:focus{background:#2cc36b;outline:0}.btn:active{box-shadow:inset 0 -1px 0 0 rgba(0,0,0,.05),inset 0 2px 0 0 rgba(0,0,0,.1);padding:8px 12px 6px}.btn:visited{color:#fff}.btn-disabled,.btn-disabled:active,.btn-disabled:focus,.btn-disabled:hover,.btn:disabled{background-image:none;filter:progid:DXImageTransform.Microsoft.gradient(enabled = false);filter:alpha(opacity=40);opacity:.4;cursor:not-allowed;box-shadow:none}.btn::-moz-focus-inner{padding:0;border:0}.btn-small{font-size:80%}.btn-info{background-color:#2980b9!important}.btn-info:hover{background-color:#2e8ece!important}.btn-neutral{background-color:#f3f6f6!important;color:#404040!important}.btn-neutral:hover{background-color:#e5ebeb!important;color:#404040}.btn-neutral:visited{color:#404040!important}.btn-success{background-color:#27ae60!important}.btn-success:hover{background-color:#295!important}.btn-danger{background-color:#e74c3c!important}.btn-danger:hover{background-color:#ea6153!important}.btn-warning{background-color:#e67e22!important}.btn-warning:hover{background-color:#e98b39!important}.btn-invert{background-color:#222}.btn-invert:hover{background-color:#2f2f2f!important}.btn-link{background-color:transparent!important;color:#2980b9;box-shadow:none;border-color:transparent!important}.btn-link:active,.btn-link:hover{background-color:transparent!important;color:#409ad5!important;box-shadow:none}.btn-link:visited{color:#9b59b6}.wy-btn-group .btn,.wy-control .btn{vertical-align:middle}.wy-btn-group{margin-bottom:24px;*zoom:1}.wy-btn-group:after,.wy-btn-group:before{display:table;content:""}.wy-btn-group:after{clear:both}.wy-dropdown{position:relative;display:inline-block}.wy-dropdown-active .wy-dropdown-menu{display:block}.wy-dropdown-menu{position:absolute;left:0;display:none;float:left;top:100%;min-width:100%;background:#fcfcfc;z-index:100;border:1px solid #cfd7dd;box-shadow:0 2px 2px 0 rgba(0,0,0,.1);padding:12px}.wy-dropdown-menu>dd>a{display:block;clear:both;color:#404040;white-space:nowrap;font-size:90%;padding:0 12px;cursor:pointer}.wy-dropdown-menu>dd>a:hover{background:#2980b9;color:#fff}.wy-dropdown-menu>dd.divider{border-top:1px solid #cfd7dd;margin:6px 0}.wy-dropdown-menu>dd.search{padding-bottom:12px}.wy-dropdown-menu>dd.search input[type=search]{width:100%}.wy-dropdown-menu>dd.call-to-action{background:#e3e3e3;text-transform:uppercase;font-weight:500;font-size:80%}.wy-dropdown-menu>dd.call-to-action:hover{background:#e3e3e3}.wy-dropdown-menu>dd.call-to-action .btn{color:#fff}.wy-dropdown.wy-dropdown-up .wy-dropdown-menu{bottom:100%;top:auto;left:auto;right:0}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu{background:#fcfcfc;margin-top:2px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a{padding:6px 12px}.wy-dropdown.wy-dropdown-bubble .wy-dropdown-menu a:hover{background:#2980b9;color:#fff}.wy-dropdown.wy-dropdown-left .wy-dropdown-menu{right:0;left:auto;text-align:right}.wy-dropdown-arrow:before{content:" ";border-bottom:5px solid #f5f5f5;border-left:5px solid transparent;border-right:5px solid transparent;position:absolute;display:block;top:-4px;left:50%;margin-left:-3px}.wy-dropdown-arrow.wy-dropdown-arrow-left:before{left:11px}.wy-form-stacked select{display:block}.wy-form-aligned .wy-help-inline,.wy-form-aligned input,.wy-form-aligned label,.wy-form-aligned select,.wy-form-aligned textarea{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-form-aligned .wy-control-group>label{display:inline-block;vertical-align:middle;width:10em;margin:6px 12px 0 0;float:left}.wy-form-aligned .wy-control{float:left}.wy-form-aligned .wy-control label{display:block}.wy-form-aligned .wy-control select{margin-top:6px}fieldset{margin:0}fieldset,legend{border:0;padding:0}legend{width:100%;white-space:normal;margin-bottom:24px;font-size:150%;*margin-left:-7px}label,legend{display:block}label{margin:0 0 .3125em;color:#333;font-size:90%}input,select,textarea{font-size:100%;margin:0;vertical-align:baseline;*vertical-align:middle}.wy-control-group{margin-bottom:24px;max-width:1200px;margin-left:auto;margin-right:auto;*zoom:1}.wy-control-group:after,.wy-control-group:before{display:table;content:""}.wy-control-group:after{clear:both}.wy-control-group.wy-control-group-required>label:after{content:" *";color:#e74c3c}.wy-control-group .wy-form-full,.wy-control-group .wy-form-halves,.wy-control-group .wy-form-thirds{padding-bottom:12px}.wy-control-group .wy-form-full input[type=color],.wy-control-group .wy-form-full input[type=date],.wy-control-group .wy-form-full input[type=datetime-local],.wy-control-group .wy-form-full input[type=datetime],.wy-control-group .wy-form-full input[type=email],.wy-control-group .wy-form-full input[type=month],.wy-control-group .wy-form-full input[type=number],.wy-control-group .wy-form-full input[type=password],.wy-control-group .wy-form-full input[type=search],.wy-control-group .wy-form-full input[type=tel],.wy-control-group .wy-form-full input[type=text],.wy-control-group .wy-form-full input[type=time],.wy-control-group .wy-form-full input[type=url],.wy-control-group .wy-form-full input[type=week],.wy-control-group .wy-form-full select,.wy-control-group .wy-form-halves input[type=color],.wy-control-group .wy-form-halves input[type=date],.wy-control-group .wy-form-halves input[type=datetime-local],.wy-control-group .wy-form-halves input[type=datetime],.wy-control-group .wy-form-halves input[type=email],.wy-control-group .wy-form-halves input[type=month],.wy-control-group .wy-form-halves input[type=number],.wy-control-group .wy-form-halves input[type=password],.wy-control-group .wy-form-halves input[type=search],.wy-control-group .wy-form-halves input[type=tel],.wy-control-group .wy-form-halves input[type=text],.wy-control-group .wy-form-halves input[type=time],.wy-control-group .wy-form-halves input[type=url],.wy-control-group .wy-form-halves input[type=week],.wy-control-group .wy-form-halves select,.wy-control-group .wy-form-thirds input[type=color],.wy-control-group .wy-form-thirds input[type=date],.wy-control-group .wy-form-thirds input[type=datetime-local],.wy-control-group .wy-form-thirds input[type=datetime],.wy-control-group .wy-form-thirds input[type=email],.wy-control-group .wy-form-thirds input[type=month],.wy-control-group .wy-form-thirds input[type=number],.wy-control-group .wy-form-thirds input[type=password],.wy-control-group .wy-form-thirds input[type=search],.wy-control-group .wy-form-thirds input[type=tel],.wy-control-group .wy-form-thirds input[type=text],.wy-control-group .wy-form-thirds input[type=time],.wy-control-group .wy-form-thirds input[type=url],.wy-control-group .wy-form-thirds input[type=week],.wy-control-group .wy-form-thirds select{width:100%}.wy-control-group .wy-form-full{float:left;display:block;width:100%;margin-right:0}.wy-control-group .wy-form-full:last-child{margin-right:0}.wy-control-group .wy-form-halves{float:left;display:block;margin-right:2.35765%;width:48.82117%}.wy-control-group .wy-form-halves:last-child,.wy-control-group .wy-form-halves:nth-of-type(2n){margin-right:0}.wy-control-group .wy-form-halves:nth-of-type(odd){clear:left}.wy-control-group .wy-form-thirds{float:left;display:block;margin-right:2.35765%;width:31.76157%}.wy-control-group .wy-form-thirds:last-child,.wy-control-group .wy-form-thirds:nth-of-type(3n){margin-right:0}.wy-control-group .wy-form-thirds:nth-of-type(3n+1){clear:left}.wy-control-group.wy-control-group-no-input .wy-control,.wy-control-no-input{margin:6px 0 0;font-size:90%}.wy-control-no-input{display:inline-block}.wy-control-group.fluid-input input[type=color],.wy-control-group.fluid-input input[type=date],.wy-control-group.fluid-input input[type=datetime-local],.wy-control-group.fluid-input input[type=datetime],.wy-control-group.fluid-input input[type=email],.wy-control-group.fluid-input input[type=month],.wy-control-group.fluid-input input[type=number],.wy-control-group.fluid-input input[type=password],.wy-control-group.fluid-input input[type=search],.wy-control-group.fluid-input input[type=tel],.wy-control-group.fluid-input input[type=text],.wy-control-group.fluid-input input[type=time],.wy-control-group.fluid-input input[type=url],.wy-control-group.fluid-input input[type=week]{width:100%}.wy-form-message-inline{padding-left:.3em;color:#666;font-size:90%}.wy-form-message{display:block;color:#999;font-size:70%;margin-top:.3125em;font-style:italic}.wy-form-message p{font-size:inherit;font-style:italic;margin-bottom:6px}.wy-form-message p:last-child{margin-bottom:0}input{line-height:normal}input[type=button],input[type=reset],input[type=submit]{-webkit-appearance:button;cursor:pointer;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;*overflow:visible}input[type=color],input[type=date],input[type=datetime-local],input[type=datetime],input[type=email],input[type=month],input[type=number],input[type=password],input[type=search],input[type=tel],input[type=text],input[type=time],input[type=url],input[type=week]{-webkit-appearance:none;padding:6px;display:inline-block;border:1px solid #ccc;font-size:80%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;box-shadow:inset 0 1px 3px #ddd;border-radius:0;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}input[type=datetime-local]{padding:.34375em .625em}input[disabled]{cursor:default}input[type=checkbox],input[type=radio]{padding:0;margin-right:.3125em;*height:13px;*width:13px}input[type=checkbox],input[type=radio],input[type=search]{-webkit-box-sizing:border-box;-moz-box-sizing:border-box;box-sizing:border-box}input[type=search]::-webkit-search-cancel-button,input[type=search]::-webkit-search-decoration{-webkit-appearance:none}input[type=color]:focus,input[type=date]:focus,input[type=datetime-local]:focus,input[type=datetime]:focus,input[type=email]:focus,input[type=month]:focus,input[type=number]:focus,input[type=password]:focus,input[type=search]:focus,input[type=tel]:focus,input[type=text]:focus,input[type=time]:focus,input[type=url]:focus,input[type=week]:focus{outline:0;outline:thin dotted\9;border-color:#333}input.no-focus:focus{border-color:#ccc!important}input[type=checkbox]:focus,input[type=file]:focus,input[type=radio]:focus{outline:thin dotted #333;outline:1px auto #129fea}input[type=color][disabled],input[type=date][disabled],input[type=datetime-local][disabled],input[type=datetime][disabled],input[type=email][disabled],input[type=month][disabled],input[type=number][disabled],input[type=password][disabled],input[type=search][disabled],input[type=tel][disabled],input[type=text][disabled],input[type=time][disabled],input[type=url][disabled],input[type=week][disabled]{cursor:not-allowed;background-color:#fafafa}input:focus:invalid,select:focus:invalid,textarea:focus:invalid{color:#e74c3c;border:1px solid #e74c3c}input:focus:invalid:focus,select:focus:invalid:focus,textarea:focus:invalid:focus{border-color:#e74c3c}input[type=checkbox]:focus:invalid:focus,input[type=file]:focus:invalid:focus,input[type=radio]:focus:invalid:focus{outline-color:#e74c3c}input.wy-input-large{padding:12px;font-size:100%}textarea{overflow:auto;vertical-align:top;width:100%;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif}select,textarea{padding:.5em .625em;display:inline-block;border:1px solid #ccc;font-size:80%;box-shadow:inset 0 1px 3px #ddd;-webkit-transition:border .3s linear;-moz-transition:border .3s linear;transition:border .3s linear}select{border:1px solid #ccc;background-color:#fff}select[multiple]{height:auto}select:focus,textarea:focus{outline:0}input[readonly],select[disabled],select[readonly],textarea[disabled],textarea[readonly]{cursor:not-allowed;background-color:#fafafa}input[type=checkbox][disabled],input[type=radio][disabled]{cursor:not-allowed}.wy-checkbox,.wy-radio{margin:6px 0;color:#404040;display:block}.wy-checkbox input,.wy-radio input{vertical-align:baseline}.wy-form-message-inline{display:inline-block;*display:inline;*zoom:1;vertical-align:middle}.wy-input-prefix,.wy-input-suffix{white-space:nowrap;padding:6px}.wy-input-prefix .wy-input-context,.wy-input-suffix .wy-input-context{line-height:27px;padding:0 8px;display:inline-block;font-size:80%;background-color:#f3f6f6;border:1px solid #ccc;color:#999}.wy-input-suffix .wy-input-context{border-left:0}.wy-input-prefix .wy-input-context{border-right:0}.wy-switch{position:relative;display:block;height:24px;margin-top:12px;cursor:pointer}.wy-switch:before{left:0;top:0;width:36px;height:12px;background:#ccc}.wy-switch:after,.wy-switch:before{position:absolute;content:"";display:block;border-radius:4px;-webkit-transition:all .2s ease-in-out;-moz-transition:all .2s ease-in-out;transition:all .2s ease-in-out}.wy-switch:after{width:18px;height:18px;background:#999;left:-3px;top:-3px}.wy-switch span{position:absolute;left:48px;display:block;font-size:12px;color:#ccc;line-height:1}.wy-switch.active:before{background:#1e8449}.wy-switch.active:after{left:24px;background:#27ae60}.wy-switch.disabled{cursor:not-allowed;opacity:.8}.wy-control-group.wy-control-group-error .wy-form-message,.wy-control-group.wy-control-group-error>label{color:#e74c3c}.wy-control-group.wy-control-group-error input[type=color],.wy-control-group.wy-control-group-error input[type=date],.wy-control-group.wy-control-group-error input[type=datetime-local],.wy-control-group.wy-control-group-error input[type=datetime],.wy-control-group.wy-control-group-error input[type=email],.wy-control-group.wy-control-group-error input[type=month],.wy-control-group.wy-control-group-error input[type=number],.wy-control-group.wy-control-group-error input[type=password],.wy-control-group.wy-control-group-error input[type=search],.wy-control-group.wy-control-group-error input[type=tel],.wy-control-group.wy-control-group-error input[type=text],.wy-control-group.wy-control-group-error input[type=time],.wy-control-group.wy-control-group-error input[type=url],.wy-control-group.wy-control-group-error input[type=week],.wy-control-group.wy-control-group-error textarea{border:1px solid #e74c3c}.wy-inline-validate{white-space:nowrap}.wy-inline-validate .wy-input-context{padding:.5em .625em;display:inline-block;font-size:80%}.wy-inline-validate.wy-inline-validate-success .wy-input-context{color:#27ae60}.wy-inline-validate.wy-inline-validate-danger .wy-input-context{color:#e74c3c}.wy-inline-validate.wy-inline-validate-warning .wy-input-context{color:#e67e22}.wy-inline-validate.wy-inline-validate-info .wy-input-context{color:#2980b9}.rotate-90{-webkit-transform:rotate(90deg);-moz-transform:rotate(90deg);-ms-transform:rotate(90deg);-o-transform:rotate(90deg);transform:rotate(90deg)}.rotate-180{-webkit-transform:rotate(180deg);-moz-transform:rotate(180deg);-ms-transform:rotate(180deg);-o-transform:rotate(180deg);transform:rotate(180deg)}.rotate-270{-webkit-transform:rotate(270deg);-moz-transform:rotate(270deg);-ms-transform:rotate(270deg);-o-transform:rotate(270deg);transform:rotate(270deg)}.mirror{-webkit-transform:scaleX(-1);-moz-transform:scaleX(-1);-ms-transform:scaleX(-1);-o-transform:scaleX(-1);transform:scaleX(-1)}.mirror.rotate-90{-webkit-transform:scaleX(-1) rotate(90deg);-moz-transform:scaleX(-1) rotate(90deg);-ms-transform:scaleX(-1) rotate(90deg);-o-transform:scaleX(-1) rotate(90deg);transform:scaleX(-1) rotate(90deg)}.mirror.rotate-180{-webkit-transform:scaleX(-1) rotate(180deg);-moz-transform:scaleX(-1) rotate(180deg);-ms-transform:scaleX(-1) rotate(180deg);-o-transform:scaleX(-1) rotate(180deg);transform:scaleX(-1) rotate(180deg)}.mirror.rotate-270{-webkit-transform:scaleX(-1) rotate(270deg);-moz-transform:scaleX(-1) rotate(270deg);-ms-transform:scaleX(-1) rotate(270deg);-o-transform:scaleX(-1) rotate(270deg);transform:scaleX(-1) rotate(270deg)}@media only screen and (max-width:480px){.wy-form button[type=submit]{margin:.7em 0 0}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=text],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week],.wy-form label{margin-bottom:.3em;display:block}.wy-form input[type=color],.wy-form input[type=date],.wy-form input[type=datetime-local],.wy-form input[type=datetime],.wy-form input[type=email],.wy-form input[type=month],.wy-form input[type=number],.wy-form input[type=password],.wy-form input[type=search],.wy-form input[type=tel],.wy-form input[type=time],.wy-form input[type=url],.wy-form input[type=week]{margin-bottom:0}.wy-form-aligned .wy-control-group label{margin-bottom:.3em;text-align:left;display:block;width:100%}.wy-form-aligned .wy-control{margin:1.5em 0 0}.wy-form-message,.wy-form-message-inline,.wy-form .wy-help-inline{display:block;font-size:80%;padding:6px 0}}@media screen and (max-width:768px){.tablet-hide{display:none}}@media screen and (max-width:480px){.mobile-hide{display:none}}.float-left{float:left}.float-right{float:right}.full-width{width:100%}.rst-content table.docutils,.rst-content table.field-list,.wy-table{border-collapse:collapse;border-spacing:0;empty-cells:show;margin-bottom:24px}.rst-content table.docutils caption,.rst-content table.field-list caption,.wy-table caption{color:#000;font:italic 85%/1 arial,sans-serif;padding:1em 0;text-align:center}.rst-content table.docutils td,.rst-content table.docutils th,.rst-content table.field-list td,.rst-content table.field-list th,.wy-table td,.wy-table th{font-size:90%;margin:0;overflow:visible;padding:8px 16px}.rst-content table.docutils td:first-child,.rst-content table.docutils th:first-child,.rst-content table.field-list td:first-child,.rst-content table.field-list th:first-child,.wy-table td:first-child,.wy-table th:first-child{border-left-width:0}.rst-content table.docutils thead,.rst-content table.field-list thead,.wy-table thead{color:#000;text-align:left;vertical-align:bottom;white-space:nowrap}.rst-content table.docutils thead th,.rst-content table.field-list thead th,.wy-table thead th{font-weight:700;border-bottom:2px solid #e1e4e5}.rst-content table.docutils td,.rst-content table.field-list td,.wy-table td{background-color:transparent;vertical-align:middle}.rst-content table.docutils td p,.rst-content table.field-list td p,.wy-table td p{line-height:18px}.rst-content table.docutils td p:last-child,.rst-content table.field-list td p:last-child,.wy-table td p:last-child{margin-bottom:0}.rst-content table.docutils .wy-table-cell-min,.rst-content table.field-list .wy-table-cell-min,.wy-table .wy-table-cell-min{width:1%;padding-right:0}.rst-content table.docutils .wy-table-cell-min input[type=checkbox],.rst-content table.field-list .wy-table-cell-min input[type=checkbox],.wy-table .wy-table-cell-min input[type=checkbox]{margin:0}.wy-table-secondary{color:grey;font-size:90%}.wy-table-tertiary{color:grey;font-size:80%}.rst-content table.docutils:not(.field-list) tr:nth-child(2n-1) td,.wy-table-backed,.wy-table-odd td,.wy-table-striped tr:nth-child(2n-1) td{background-color:#f3f6f6}.rst-content table.docutils,.wy-table-bordered-all{border:1px solid #e1e4e5}.rst-content table.docutils td,.wy-table-bordered-all td{border-bottom:1px solid #e1e4e5;border-left:1px solid #e1e4e5}.rst-content table.docutils tbody>tr:last-child td,.wy-table-bordered-all tbody>tr:last-child td{border-bottom-width:0}.wy-table-bordered{border:1px solid #e1e4e5}.wy-table-bordered-rows td{border-bottom:1px solid #e1e4e5}.wy-table-bordered-rows tbody>tr:last-child td{border-bottom-width:0}.wy-table-horizontal td,.wy-table-horizontal th{border-width:0 0 1px;border-bottom:1px solid #e1e4e5}.wy-table-horizontal tbody>tr:last-child td{border-bottom-width:0}.wy-table-responsive{margin-bottom:24px;max-width:100%;overflow:auto}.wy-table-responsive table{margin-bottom:0!important}.wy-table-responsive table td,.wy-table-responsive table th{white-space:nowrap}a{color:#2980b9;text-decoration:none;cursor:pointer}a:hover{color:#3091d1}a:visited{color:#9b59b6}html{height:100%}body,html{overflow-x:hidden}body{font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;font-weight:400;color:#404040;min-height:100%;background:#edf0f2}.wy-text-left{text-align:left}.wy-text-center{text-align:center}.wy-text-right{text-align:right}.wy-text-large{font-size:120%}.wy-text-normal{font-size:100%}.wy-text-small,small{font-size:80%}.wy-text-strike{text-decoration:line-through}.wy-text-warning{color:#e67e22!important}a.wy-text-warning:hover{color:#eb9950!important}.wy-text-info{color:#2980b9!important}a.wy-text-info:hover{color:#409ad5!important}.wy-text-success{color:#27ae60!important}a.wy-text-success:hover{color:#36d278!important}.wy-text-danger{color:#e74c3c!important}a.wy-text-danger:hover{color:#ed7669!important}.wy-text-neutral{color:#404040!important}a.wy-text-neutral:hover{color:#595959!important}.rst-content .toctree-wrapper>p.caption,h1,h2,h3,h4,h5,h6,legend{margin-top:0;font-weight:700;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif}p{line-height:24px;font-size:16px;margin:0 0 24px}h1{font-size:175%}.rst-content .toctree-wrapper>p.caption,h2{font-size:150%}h3{font-size:125%}h4{font-size:115%}h5{font-size:110%}h6{font-size:100%}hr{display:block;height:1px;border:0;border-top:1px solid #e1e4e5;margin:24px 0;padding:0}.rst-content code,.rst-content tt,code{white-space:nowrap;max-width:100%;background:#fff;border:1px solid #e1e4e5;font-size:75%;padding:0 5px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#e74c3c;overflow-x:auto}.rst-content tt.code-large,code.code-large{font-size:90%}.rst-content .section ul,.rst-content .toctree-wrapper ul,.rst-content section ul,.wy-plain-list-disc,article ul{list-style:disc;line-height:24px;margin-bottom:24px}.rst-content .section ul li,.rst-content .toctree-wrapper ul li,.rst-content section ul li,.wy-plain-list-disc li,article ul li{list-style:disc;margin-left:24px}.rst-content .section ul li p:last-child,.rst-content .section ul li ul,.rst-content .toctree-wrapper ul li p:last-child,.rst-content .toctree-wrapper ul li ul,.rst-content section ul li p:last-child,.rst-content section ul li ul,.wy-plain-list-disc li p:last-child,.wy-plain-list-disc li ul,article ul li p:last-child,article ul li ul{margin-bottom:0}.rst-content .section ul li li,.rst-content .toctree-wrapper ul li li,.rst-content section ul li li,.wy-plain-list-disc li li,article ul li li{list-style:circle}.rst-content .section ul li li li,.rst-content .toctree-wrapper ul li li li,.rst-content section ul li li li,.wy-plain-list-disc li li li,article ul li li li{list-style:square}.rst-content .section ul li ol li,.rst-content .toctree-wrapper ul li ol li,.rst-content section ul li ol li,.wy-plain-list-disc li ol li,article ul li ol li{list-style:decimal}.rst-content .section ol,.rst-content .section ol.arabic,.rst-content .toctree-wrapper ol,.rst-content .toctree-wrapper ol.arabic,.rst-content section ol,.rst-content section ol.arabic,.wy-plain-list-decimal,article ol{list-style:decimal;line-height:24px;margin-bottom:24px}.rst-content .section ol.arabic li,.rst-content .section ol li,.rst-content .toctree-wrapper ol.arabic li,.rst-content .toctree-wrapper ol li,.rst-content section ol.arabic li,.rst-content section ol li,.wy-plain-list-decimal li,article ol li{list-style:decimal;margin-left:24px}.rst-content .section ol.arabic li ul,.rst-content .section ol li p:last-child,.rst-content .section ol li ul,.rst-content .toctree-wrapper ol.arabic li ul,.rst-content .toctree-wrapper ol li p:last-child,.rst-content .toctree-wrapper ol li ul,.rst-content section ol.arabic li ul,.rst-content section ol li p:last-child,.rst-content section ol li ul,.wy-plain-list-decimal li p:last-child,.wy-plain-list-decimal li ul,article ol li p:last-child,article ol li ul{margin-bottom:0}.rst-content .section ol.arabic li ul li,.rst-content .section ol li ul li,.rst-content .toctree-wrapper ol.arabic li ul li,.rst-content .toctree-wrapper ol li ul li,.rst-content section ol.arabic li ul li,.rst-content section ol li ul li,.wy-plain-list-decimal li ul li,article ol li ul li{list-style:disc}.wy-breadcrumbs{*zoom:1}.wy-breadcrumbs:after,.wy-breadcrumbs:before{display:table;content:""}.wy-breadcrumbs:after{clear:both}.wy-breadcrumbs>li{display:inline-block;padding-top:5px}.wy-breadcrumbs>li.wy-breadcrumbs-aside{float:right}.rst-content .wy-breadcrumbs>li code,.rst-content .wy-breadcrumbs>li tt,.wy-breadcrumbs>li .rst-content tt,.wy-breadcrumbs>li code{all:inherit;color:inherit}.breadcrumb-item:before{content:"/";color:#bbb;font-size:13px;padding:0 6px 0 3px}.wy-breadcrumbs-extra{margin-bottom:0;color:#b3b3b3;font-size:80%;display:inline-block}@media screen and (max-width:480px){.wy-breadcrumbs-extra,.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}@media print{.wy-breadcrumbs li.wy-breadcrumbs-aside{display:none}}html{font-size:16px}.wy-affix{position:fixed;top:1.618em}.wy-menu a:hover{text-decoration:none}.wy-menu-horiz{*zoom:1}.wy-menu-horiz:after,.wy-menu-horiz:before{display:table;content:""}.wy-menu-horiz:after{clear:both}.wy-menu-horiz li,.wy-menu-horiz ul{display:inline-block}.wy-menu-horiz li:hover{background:hsla(0,0%,100%,.1)}.wy-menu-horiz li.divide-left{border-left:1px solid #404040}.wy-menu-horiz li.divide-right{border-right:1px solid #404040}.wy-menu-horiz a{height:32px;display:inline-block;line-height:32px;padding:0 16px}.wy-menu-vertical{width:300px}.wy-menu-vertical header,.wy-menu-vertical p.caption{color:#55a5d9;height:32px;line-height:32px;padding:0 1.618em;margin:12px 0 0;display:block;font-weight:700;text-transform:uppercase;font-size:85%;white-space:nowrap}.wy-menu-vertical ul{margin-bottom:0}.wy-menu-vertical li.divide-top{border-top:1px solid #404040}.wy-menu-vertical li.divide-bottom{border-bottom:1px solid #404040}.wy-menu-vertical li.current{background:#e3e3e3}.wy-menu-vertical li.current a{color:grey;border-right:1px solid #c9c9c9;padding:.4045em 2.427em}.wy-menu-vertical li.current a:hover{background:#d6d6d6}.rst-content .wy-menu-vertical li tt,.wy-menu-vertical li .rst-content tt,.wy-menu-vertical li code{border:none;background:inherit;color:inherit;padding-left:0;padding-right:0}.wy-menu-vertical li button.toctree-expand{display:block;float:left;margin-left:-1.2em;line-height:18px;color:#4d4d4d;border:none;background:none;padding:0}.wy-menu-vertical li.current>a,.wy-menu-vertical li.on a{color:#404040;font-weight:700;position:relative;background:#fcfcfc;border:none;padding:.4045em 1.618em}.wy-menu-vertical li.current>a:hover,.wy-menu-vertical li.on a:hover{background:#fcfcfc}.wy-menu-vertical li.current>a:hover button.toctree-expand,.wy-menu-vertical li.on a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.current>a button.toctree-expand,.wy-menu-vertical li.on a button.toctree-expand{display:block;line-height:18px;color:#333}.wy-menu-vertical li.toctree-l1.current>a{border-bottom:1px solid #c9c9c9;border-top:1px solid #c9c9c9}.wy-menu-vertical .toctree-l1.current .toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .toctree-l11>ul{display:none}.wy-menu-vertical .toctree-l1.current .current.toctree-l2>ul,.wy-menu-vertical .toctree-l2.current .current.toctree-l3>ul,.wy-menu-vertical .toctree-l3.current .current.toctree-l4>ul,.wy-menu-vertical .toctree-l4.current .current.toctree-l5>ul,.wy-menu-vertical .toctree-l5.current .current.toctree-l6>ul,.wy-menu-vertical .toctree-l6.current .current.toctree-l7>ul,.wy-menu-vertical .toctree-l7.current .current.toctree-l8>ul,.wy-menu-vertical .toctree-l8.current .current.toctree-l9>ul,.wy-menu-vertical .toctree-l9.current .current.toctree-l10>ul,.wy-menu-vertical .toctree-l10.current .current.toctree-l11>ul{display:block}.wy-menu-vertical li.toctree-l3,.wy-menu-vertical li.toctree-l4{font-size:.9em}.wy-menu-vertical li.toctree-l2 a,.wy-menu-vertical li.toctree-l3 a,.wy-menu-vertical li.toctree-l4 a,.wy-menu-vertical li.toctree-l5 a,.wy-menu-vertical li.toctree-l6 a,.wy-menu-vertical li.toctree-l7 a,.wy-menu-vertical li.toctree-l8 a,.wy-menu-vertical li.toctree-l9 a,.wy-menu-vertical li.toctree-l10 a{color:#404040}.wy-menu-vertical li.toctree-l2 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l3 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l4 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l5 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l6 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l7 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l8 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l9 a:hover button.toctree-expand,.wy-menu-vertical li.toctree-l10 a:hover button.toctree-expand{color:grey}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a,.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a,.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a,.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a,.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a,.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a,.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a,.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{display:block}.wy-menu-vertical li.toctree-l2.current>a{padding:.4045em 2.427em}.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{padding:.4045em 1.618em .4045em 4.045em}.wy-menu-vertical li.toctree-l3.current>a{padding:.4045em 4.045em}.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{padding:.4045em 1.618em .4045em 5.663em}.wy-menu-vertical li.toctree-l4.current>a{padding:.4045em 5.663em}.wy-menu-vertical li.toctree-l4.current li.toctree-l5>a{padding:.4045em 1.618em .4045em 7.281em}.wy-menu-vertical li.toctree-l5.current>a{padding:.4045em 7.281em}.wy-menu-vertical li.toctree-l5.current li.toctree-l6>a{padding:.4045em 1.618em .4045em 8.899em}.wy-menu-vertical li.toctree-l6.current>a{padding:.4045em 8.899em}.wy-menu-vertical li.toctree-l6.current li.toctree-l7>a{padding:.4045em 1.618em .4045em 10.517em}.wy-menu-vertical li.toctree-l7.current>a{padding:.4045em 10.517em}.wy-menu-vertical li.toctree-l7.current li.toctree-l8>a{padding:.4045em 1.618em .4045em 12.135em}.wy-menu-vertical li.toctree-l8.current>a{padding:.4045em 12.135em}.wy-menu-vertical li.toctree-l8.current li.toctree-l9>a{padding:.4045em 1.618em .4045em 13.753em}.wy-menu-vertical li.toctree-l9.current>a{padding:.4045em 13.753em}.wy-menu-vertical li.toctree-l9.current li.toctree-l10>a{padding:.4045em 1.618em .4045em 15.371em}.wy-menu-vertical li.toctree-l10.current>a{padding:.4045em 15.371em}.wy-menu-vertical li.toctree-l10.current li.toctree-l11>a{padding:.4045em 1.618em .4045em 16.989em}.wy-menu-vertical li.toctree-l2.current>a,.wy-menu-vertical li.toctree-l2.current li.toctree-l3>a{background:#c9c9c9}.wy-menu-vertical li.toctree-l2 button.toctree-expand{color:#a3a3a3}.wy-menu-vertical li.toctree-l3.current>a,.wy-menu-vertical li.toctree-l3.current li.toctree-l4>a{background:#bdbdbd}.wy-menu-vertical li.toctree-l3 button.toctree-expand{color:#969696}.wy-menu-vertical li.current ul{display:block}.wy-menu-vertical li ul{margin-bottom:0;display:none}.wy-menu-vertical li ul li a{margin-bottom:0;color:#d9d9d9;font-weight:400}.wy-menu-vertical a{line-height:18px;padding:.4045em 1.618em;display:block;position:relative;font-size:90%;color:#d9d9d9}.wy-menu-vertical a:hover{background-color:#4e4a4a;cursor:pointer}.wy-menu-vertical a:hover button.toctree-expand{color:#d9d9d9}.wy-menu-vertical a:active{background-color:#2980b9;cursor:pointer;color:#fff}.wy-menu-vertical a:active button.toctree-expand{color:#fff}.wy-side-nav-search{display:block;width:300px;padding:.809em;margin-bottom:.809em;z-index:200;background-color:#2980b9;text-align:center;color:#fcfcfc}.wy-side-nav-search input[type=text]{width:100%;border-radius:50px;padding:6px 12px;border-color:#2472a4}.wy-side-nav-search img{display:block;margin:auto auto .809em;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-side-nav-search .wy-dropdown>a,.wy-side-nav-search>a{color:#fcfcfc;font-size:100%;font-weight:700;display:inline-block;padding:4px 6px;margin-bottom:.809em;max-width:100%}.wy-side-nav-search .wy-dropdown>a:hover,.wy-side-nav-search>a:hover{background:hsla(0,0%,100%,.1)}.wy-side-nav-search .wy-dropdown>a img.logo,.wy-side-nav-search>a img.logo{display:block;margin:0 auto;height:auto;width:auto;border-radius:0;max-width:100%;background:transparent}.wy-side-nav-search .wy-dropdown>a.icon img.logo,.wy-side-nav-search>a.icon img.logo{margin-top:.85em}.wy-side-nav-search>div.version{margin-top:-.4045em;margin-bottom:.809em;font-weight:400;color:hsla(0,0%,100%,.3)}.wy-nav .wy-menu-vertical header{color:#2980b9}.wy-nav .wy-menu-vertical a{color:#b3b3b3}.wy-nav .wy-menu-vertical a:hover{background-color:#2980b9;color:#fff}[data-menu-wrap]{-webkit-transition:all .2s ease-in;-moz-transition:all .2s ease-in;transition:all .2s ease-in;position:absolute;opacity:1;width:100%;opacity:0}[data-menu-wrap].move-center{left:0;right:auto;opacity:1}[data-menu-wrap].move-left{right:auto;left:-100%;opacity:0}[data-menu-wrap].move-right{right:-100%;left:auto;opacity:0}.wy-body-for-nav{background:#fcfcfc}.wy-grid-for-nav{position:absolute;width:100%;height:100%}.wy-nav-side{position:fixed;top:0;bottom:0;left:0;padding-bottom:2em;width:300px;overflow-x:hidden;overflow-y:hidden;min-height:100%;color:#9b9b9b;background:#343131;z-index:200}.wy-side-scroll{width:320px;position:relative;overflow-x:hidden;overflow-y:scroll;height:100%}.wy-nav-top{display:none;background:#2980b9;color:#fff;padding:.4045em .809em;position:relative;line-height:50px;text-align:center;font-size:100%;*zoom:1}.wy-nav-top:after,.wy-nav-top:before{display:table;content:""}.wy-nav-top:after{clear:both}.wy-nav-top a{color:#fff;font-weight:700}.wy-nav-top img{margin-right:12px;height:45px;width:45px;background-color:#2980b9;padding:5px;border-radius:100%}.wy-nav-top i{font-size:30px;float:left;cursor:pointer;padding-top:inherit}.wy-nav-content-wrap{margin-left:300px;background:#fcfcfc;min-height:100%}.wy-nav-content{padding:1.618em 3.236em;height:100%;max-width:800px;margin:auto}.wy-body-mask{position:fixed;width:100%;height:100%;background:rgba(0,0,0,.2);display:none;z-index:499}.wy-body-mask.on{display:block}footer{color:grey}footer p{margin-bottom:12px}.rst-content footer span.commit tt,footer span.commit .rst-content tt,footer span.commit code{padding:0;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:1em;background:none;border:none;color:grey}.rst-footer-buttons{*zoom:1}.rst-footer-buttons:after,.rst-footer-buttons:before{width:100%;display:table;content:""}.rst-footer-buttons:after{clear:both}.rst-breadcrumbs-buttons{margin-top:12px;*zoom:1}.rst-breadcrumbs-buttons:after,.rst-breadcrumbs-buttons:before{display:table;content:""}.rst-breadcrumbs-buttons:after{clear:both}#search-results .search li{margin-bottom:24px;border-bottom:1px solid #e1e4e5;padding-bottom:24px}#search-results .search li:first-child{border-top:1px solid #e1e4e5;padding-top:24px}#search-results .search li a{font-size:120%;margin-bottom:12px;display:inline-block}#search-results .context{color:grey;font-size:90%}.genindextable li>ul{margin-left:24px}@media screen and (max-width:768px){.wy-body-for-nav{background:#fcfcfc}.wy-nav-top{display:block}.wy-nav-side{left:-300px}.wy-nav-side.shift{width:85%;left:0}.wy-menu.wy-menu-vertical,.wy-side-nav-search,.wy-side-scroll{width:auto}.wy-nav-content-wrap{margin-left:0}.wy-nav-content-wrap .wy-nav-content{padding:1.618em}.wy-nav-content-wrap.shift{position:fixed;min-width:100%;left:85%;top:0;height:100%;overflow:hidden}}@media screen and (min-width:1100px){.wy-nav-content-wrap{background:rgba(0,0,0,.05)}.wy-nav-content{margin:0;background:#fcfcfc}}@media print{.rst-versions,.wy-nav-side,footer{display:none}.wy-nav-content-wrap{margin-left:0}}.rst-versions{position:fixed;bottom:0;left:0;width:300px;color:#fcfcfc;background:#1f1d1d;font-family:Lato,proxima-nova,Helvetica Neue,Arial,sans-serif;z-index:400}.rst-versions a{color:#2980b9;text-decoration:none}.rst-versions .rst-badge-small{display:none}.rst-versions .rst-current-version{padding:12px;background-color:#272525;display:block;text-align:right;font-size:90%;cursor:pointer;color:#27ae60;*zoom:1}.rst-versions .rst-current-version:after,.rst-versions .rst-current-version:before{display:table;content:""}.rst-versions .rst-current-version:after{clear:both}.rst-content .code-block-caption .rst-versions .rst-current-version .headerlink,.rst-content .eqno .rst-versions .rst-current-version .headerlink,.rst-content .rst-versions .rst-current-version .admonition-title,.rst-content code.download .rst-versions .rst-current-version span:first-child,.rst-content dl dt .rst-versions .rst-current-version .headerlink,.rst-content h1 .rst-versions .rst-current-version .headerlink,.rst-content h2 .rst-versions .rst-current-version .headerlink,.rst-content h3 .rst-versions .rst-current-version .headerlink,.rst-content h4 .rst-versions .rst-current-version .headerlink,.rst-content h5 .rst-versions .rst-current-version .headerlink,.rst-content h6 .rst-versions .rst-current-version .headerlink,.rst-content p .rst-versions .rst-current-version .headerlink,.rst-content table>caption .rst-versions .rst-current-version .headerlink,.rst-content tt.download .rst-versions .rst-current-version span:first-child,.rst-versions .rst-current-version .fa,.rst-versions .rst-current-version .icon,.rst-versions .rst-current-version .rst-content .admonition-title,.rst-versions .rst-current-version .rst-content .code-block-caption .headerlink,.rst-versions .rst-current-version .rst-content .eqno .headerlink,.rst-versions .rst-current-version .rst-content code.download span:first-child,.rst-versions .rst-current-version .rst-content dl dt .headerlink,.rst-versions .rst-current-version .rst-content h1 .headerlink,.rst-versions .rst-current-version .rst-content h2 .headerlink,.rst-versions .rst-current-version .rst-content h3 .headerlink,.rst-versions .rst-current-version .rst-content h4 .headerlink,.rst-versions .rst-current-version .rst-content h5 .headerlink,.rst-versions .rst-current-version .rst-content h6 .headerlink,.rst-versions .rst-current-version .rst-content p .headerlink,.rst-versions .rst-current-version .rst-content table>caption .headerlink,.rst-versions .rst-current-version .rst-content tt.download span:first-child,.rst-versions .rst-current-version .wy-menu-vertical li button.toctree-expand,.wy-menu-vertical li .rst-versions .rst-current-version button.toctree-expand{color:#fcfcfc}.rst-versions .rst-current-version .fa-book,.rst-versions .rst-current-version .icon-book{float:left}.rst-versions .rst-current-version.rst-out-of-date{background-color:#e74c3c;color:#fff}.rst-versions .rst-current-version.rst-active-old-version{background-color:#f1c40f;color:#000}.rst-versions.shift-up{height:auto;max-height:100%;overflow-y:scroll}.rst-versions.shift-up .rst-other-versions{display:block}.rst-versions .rst-other-versions{font-size:90%;padding:12px;color:grey;display:none}.rst-versions .rst-other-versions hr{display:block;height:1px;border:0;margin:20px 0;padding:0;border-top:1px solid #413d3d}.rst-versions .rst-other-versions dd{display:inline-block;margin:0}.rst-versions .rst-other-versions dd a{display:inline-block;padding:6px;color:#fcfcfc}.rst-versions.rst-badge{width:auto;bottom:20px;right:20px;left:auto;border:none;max-width:300px;max-height:90%}.rst-versions.rst-badge .fa-book,.rst-versions.rst-badge .icon-book{float:none;line-height:30px}.rst-versions.rst-badge.shift-up .rst-current-version{text-align:right}.rst-versions.rst-badge.shift-up .rst-current-version .fa-book,.rst-versions.rst-badge.shift-up .rst-current-version .icon-book{float:left}.rst-versions.rst-badge>.rst-current-version{width:auto;height:30px;line-height:30px;padding:0 6px;display:block;text-align:center}@media screen and (max-width:768px){.rst-versions{width:85%;display:none}.rst-versions.shift{display:block}}.rst-content .toctree-wrapper>p.caption,.rst-content h1,.rst-content h2,.rst-content h3,.rst-content h4,.rst-content h5,.rst-content h6{margin-bottom:24px}.rst-content img{max-width:100%;height:auto}.rst-content div.figure,.rst-content figure{margin-bottom:24px}.rst-content div.figure .caption-text,.rst-content figure .caption-text{font-style:italic}.rst-content div.figure p:last-child.caption,.rst-content figure p:last-child.caption{margin-bottom:0}.rst-content div.figure.align-center,.rst-content figure.align-center{text-align:center}.rst-content .section>a>img,.rst-content .section>img,.rst-content section>a>img,.rst-content section>img{margin-bottom:24px}.rst-content abbr[title]{text-decoration:none}.rst-content.style-external-links a.reference.external:after{font-family:FontAwesome;content:"\f08e";color:#b3b3b3;vertical-align:super;font-size:60%;margin:0 .2em}.rst-content blockquote{margin-left:24px;line-height:24px;margin-bottom:24px}.rst-content pre.literal-block{white-space:pre;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;display:block;overflow:auto}.rst-content div[class^=highlight],.rst-content pre.literal-block{border:1px solid #e1e4e5;overflow-x:auto;margin:1px 0 24px}.rst-content div[class^=highlight] div[class^=highlight],.rst-content pre.literal-block div[class^=highlight]{padding:0;border:none;margin:0}.rst-content div[class^=highlight] td.code{width:100%}.rst-content .linenodiv pre{border-right:1px solid #e6e9ea;margin:0;padding:12px;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;user-select:none;pointer-events:none}.rst-content div[class^=highlight] pre{white-space:pre;margin:0;padding:12px;display:block;overflow:auto}.rst-content div[class^=highlight] pre .hll{display:block;margin:0 -12px;padding:0 12px}.rst-content .linenodiv pre,.rst-content div[class^=highlight] pre,.rst-content pre.literal-block{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;font-size:12px;line-height:1.4}.rst-content div.highlight .gp,.rst-content div.highlight span.linenos{user-select:none;pointer-events:none}.rst-content div.highlight span.linenos{display:inline-block;padding-left:0;padding-right:12px;margin-right:12px;border-right:1px solid #e6e9ea}.rst-content .code-block-caption{font-style:italic;font-size:85%;line-height:1;padding:1em 0;text-align:center}@media print{.rst-content .codeblock,.rst-content div[class^=highlight],.rst-content div[class^=highlight] pre{white-space:pre-wrap}}.rst-content .admonition,.rst-content .admonition-todo,.rst-content .attention,.rst-content .caution,.rst-content .danger,.rst-content .error,.rst-content .hint,.rst-content .important,.rst-content .note,.rst-content .seealso,.rst-content .tip,.rst-content .warning{clear:both}.rst-content .admonition-todo .last,.rst-content .admonition-todo>:last-child,.rst-content .admonition .last,.rst-content .admonition>:last-child,.rst-content .attention .last,.rst-content .attention>:last-child,.rst-content .caution .last,.rst-content .caution>:last-child,.rst-content .danger .last,.rst-content .danger>:last-child,.rst-content .error .last,.rst-content .error>:last-child,.rst-content .hint .last,.rst-content .hint>:last-child,.rst-content .important .last,.rst-content .important>:last-child,.rst-content .note .last,.rst-content .note>:last-child,.rst-content .seealso .last,.rst-content .seealso>:last-child,.rst-content .tip .last,.rst-content .tip>:last-child,.rst-content .warning .last,.rst-content .warning>:last-child{margin-bottom:0}.rst-content .admonition-title:before{margin-right:4px}.rst-content .admonition table{border-color:rgba(0,0,0,.1)}.rst-content .admonition table td,.rst-content .admonition table th{background:transparent!important;border-color:rgba(0,0,0,.1)!important}.rst-content .section ol.loweralpha,.rst-content .section ol.loweralpha>li,.rst-content .toctree-wrapper ol.loweralpha,.rst-content .toctree-wrapper ol.loweralpha>li,.rst-content section ol.loweralpha,.rst-content section ol.loweralpha>li{list-style:lower-alpha}.rst-content .section ol.upperalpha,.rst-content .section ol.upperalpha>li,.rst-content .toctree-wrapper ol.upperalpha,.rst-content .toctree-wrapper ol.upperalpha>li,.rst-content section ol.upperalpha,.rst-content section ol.upperalpha>li{list-style:upper-alpha}.rst-content .section ol li>*,.rst-content .section ul li>*,.rst-content .toctree-wrapper ol li>*,.rst-content .toctree-wrapper ul li>*,.rst-content section ol li>*,.rst-content section ul li>*{margin-top:12px;margin-bottom:12px}.rst-content .section ol li>:first-child,.rst-content .section ul li>:first-child,.rst-content .toctree-wrapper ol li>:first-child,.rst-content .toctree-wrapper ul li>:first-child,.rst-content section ol li>:first-child,.rst-content section ul li>:first-child{margin-top:0}.rst-content .section ol li>p,.rst-content .section ol li>p:last-child,.rst-content .section ul li>p,.rst-content .section ul li>p:last-child,.rst-content .toctree-wrapper ol li>p,.rst-content .toctree-wrapper ol li>p:last-child,.rst-content .toctree-wrapper ul li>p,.rst-content .toctree-wrapper ul li>p:last-child,.rst-content section ol li>p,.rst-content section ol li>p:last-child,.rst-content section ul li>p,.rst-content section ul li>p:last-child{margin-bottom:12px}.rst-content .section ol li>p:only-child,.rst-content .section ol li>p:only-child:last-child,.rst-content .section ul li>p:only-child,.rst-content .section ul li>p:only-child:last-child,.rst-content .toctree-wrapper ol li>p:only-child,.rst-content .toctree-wrapper ol li>p:only-child:last-child,.rst-content .toctree-wrapper ul li>p:only-child,.rst-content .toctree-wrapper ul li>p:only-child:last-child,.rst-content section ol li>p:only-child,.rst-content section ol li>p:only-child:last-child,.rst-content section ul li>p:only-child,.rst-content section ul li>p:only-child:last-child{margin-bottom:0}.rst-content .section ol li>ol,.rst-content .section ol li>ul,.rst-content .section ul li>ol,.rst-content .section ul li>ul,.rst-content .toctree-wrapper ol li>ol,.rst-content .toctree-wrapper ol li>ul,.rst-content .toctree-wrapper ul li>ol,.rst-content .toctree-wrapper ul li>ul,.rst-content section ol li>ol,.rst-content section ol li>ul,.rst-content section ul li>ol,.rst-content section ul li>ul{margin-bottom:12px}.rst-content .section ol.simple li>*,.rst-content .section ol.simple li ol,.rst-content .section ol.simple li ul,.rst-content .section ul.simple li>*,.rst-content .section ul.simple li ol,.rst-content .section ul.simple li ul,.rst-content .toctree-wrapper ol.simple li>*,.rst-content .toctree-wrapper ol.simple li ol,.rst-content .toctree-wrapper ol.simple li ul,.rst-content .toctree-wrapper ul.simple li>*,.rst-content .toctree-wrapper ul.simple li ol,.rst-content .toctree-wrapper ul.simple li ul,.rst-content section ol.simple li>*,.rst-content section ol.simple li ol,.rst-content section ol.simple li ul,.rst-content section ul.simple li>*,.rst-content section ul.simple li ol,.rst-content section ul.simple li ul{margin-top:0;margin-bottom:0}.rst-content .line-block{margin-left:0;margin-bottom:24px;line-height:24px}.rst-content .line-block .line-block{margin-left:24px;margin-bottom:0}.rst-content .topic-title{font-weight:700;margin-bottom:12px}.rst-content .toc-backref{color:#404040}.rst-content .align-right{float:right;margin:0 0 24px 24px}.rst-content .align-left{float:left;margin:0 24px 24px 0}.rst-content .align-center{margin:auto}.rst-content .align-center:not(table){display:block}.rst-content .code-block-caption .headerlink,.rst-content .eqno .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink,.rst-content dl dt .headerlink,.rst-content h1 .headerlink,.rst-content h2 .headerlink,.rst-content h3 .headerlink,.rst-content h4 .headerlink,.rst-content h5 .headerlink,.rst-content h6 .headerlink,.rst-content p.caption .headerlink,.rst-content p .headerlink,.rst-content table>caption .headerlink{opacity:0;font-size:14px;font-family:FontAwesome;margin-left:.5em}.rst-content .code-block-caption .headerlink:focus,.rst-content .code-block-caption:hover .headerlink,.rst-content .eqno .headerlink:focus,.rst-content .eqno:hover .headerlink,.rst-content .toctree-wrapper>p.caption .headerlink:focus,.rst-content .toctree-wrapper>p.caption:hover .headerlink,.rst-content dl dt .headerlink:focus,.rst-content dl dt:hover .headerlink,.rst-content h1 .headerlink:focus,.rst-content h1:hover .headerlink,.rst-content h2 .headerlink:focus,.rst-content h2:hover .headerlink,.rst-content h3 .headerlink:focus,.rst-content h3:hover .headerlink,.rst-content h4 .headerlink:focus,.rst-content h4:hover .headerlink,.rst-content h5 .headerlink:focus,.rst-content h5:hover .headerlink,.rst-content h6 .headerlink:focus,.rst-content h6:hover .headerlink,.rst-content p.caption .headerlink:focus,.rst-content p.caption:hover .headerlink,.rst-content p .headerlink:focus,.rst-content p:hover .headerlink,.rst-content table>caption .headerlink:focus,.rst-content table>caption:hover .headerlink{opacity:1}.rst-content p a{overflow-wrap:anywhere}.rst-content .wy-table td p,.rst-content .wy-table td ul,.rst-content .wy-table th p,.rst-content .wy-table th ul,.rst-content table.docutils td p,.rst-content table.docutils td ul,.rst-content table.docutils th p,.rst-content table.docutils th ul,.rst-content table.field-list td p,.rst-content table.field-list td ul,.rst-content table.field-list th p,.rst-content table.field-list th ul{font-size:inherit}.rst-content .btn:focus{outline:2px solid}.rst-content table>caption .headerlink:after{font-size:12px}.rst-content .centered{text-align:center}.rst-content .sidebar{float:right;width:40%;display:block;margin:0 0 24px 24px;padding:24px;background:#f3f6f6;border:1px solid #e1e4e5}.rst-content .sidebar dl,.rst-content .sidebar p,.rst-content .sidebar ul{font-size:90%}.rst-content .sidebar .last,.rst-content .sidebar>:last-child{margin-bottom:0}.rst-content .sidebar .sidebar-title{display:block;font-family:Roboto Slab,ff-tisa-web-pro,Georgia,Arial,sans-serif;font-weight:700;background:#e1e4e5;padding:6px 12px;margin:-24px -24px 24px;font-size:100%}.rst-content .highlighted{background:#f1c40f;box-shadow:0 0 0 2px #f1c40f;display:inline;font-weight:700}.rst-content .citation-reference,.rst-content .footnote-reference{vertical-align:baseline;position:relative;top:-.4em;line-height:0;font-size:90%}.rst-content .citation-reference>span.fn-bracket,.rst-content .footnote-reference>span.fn-bracket{display:none}.rst-content .hlist{width:100%}.rst-content dl dt span.classifier:before{content:" : "}.rst-content dl dt span.classifier-delimiter{display:none!important}html.writer-html4 .rst-content table.docutils.citation,html.writer-html4 .rst-content table.docutils.footnote{background:none;border:none}html.writer-html4 .rst-content table.docutils.citation td,html.writer-html4 .rst-content table.docutils.citation tr,html.writer-html4 .rst-content table.docutils.footnote td,html.writer-html4 .rst-content table.docutils.footnote tr{border:none;background-color:transparent!important;white-space:normal}html.writer-html4 .rst-content table.docutils.citation td.label,html.writer-html4 .rst-content table.docutils.footnote td.label{padding-left:0;padding-right:0;vertical-align:top}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{display:grid;grid-template-columns:auto minmax(80%,95%)}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{display:inline-grid;grid-template-columns:max-content auto}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{display:grid;grid-template-columns:auto auto minmax(.65rem,auto) minmax(40%,95%)}html.writer-html5 .rst-content aside.citation>span.label,html.writer-html5 .rst-content aside.footnote>span.label,html.writer-html5 .rst-content div.citation>span.label{grid-column-start:1;grid-column-end:2}html.writer-html5 .rst-content aside.citation>span.backrefs,html.writer-html5 .rst-content aside.footnote>span.backrefs,html.writer-html5 .rst-content div.citation>span.backrefs{grid-column-start:2;grid-column-end:3;grid-row-start:1;grid-row-end:3}html.writer-html5 .rst-content aside.citation>p,html.writer-html5 .rst-content aside.footnote>p,html.writer-html5 .rst-content div.citation>p{grid-column-start:4;grid-column-end:5}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.field-list,html.writer-html5 .rst-content dl.footnote{margin-bottom:24px}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dt{padding-left:1rem}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.field-list>dd,html.writer-html5 .rst-content dl.field-list>dt,html.writer-html5 .rst-content dl.footnote>dd,html.writer-html5 .rst-content dl.footnote>dt{margin-bottom:0}html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{font-size:.9rem}html.writer-html5 .rst-content dl.citation>dt,html.writer-html5 .rst-content dl.footnote>dt{margin:0 .5rem .5rem 0;line-height:1.2rem;word-break:break-all;font-weight:400}html.writer-html5 .rst-content dl.citation>dt>span.brackets:before,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:before{content:"["}html.writer-html5 .rst-content dl.citation>dt>span.brackets:after,html.writer-html5 .rst-content dl.footnote>dt>span.brackets:after{content:"]"}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a{word-break:keep-all}html.writer-html5 .rst-content dl.citation>dt>span.fn-backref>a:not(:first-child):before,html.writer-html5 .rst-content dl.footnote>dt>span.fn-backref>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content dl.citation>dd,html.writer-html5 .rst-content dl.footnote>dd{margin:0 0 .5rem;line-height:1.2rem}html.writer-html5 .rst-content dl.citation>dd p,html.writer-html5 .rst-content dl.footnote>dd p{font-size:.9rem}html.writer-html5 .rst-content aside.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content div.citation{padding-left:1rem;padding-right:1rem;font-size:.9rem;line-height:1.2rem}html.writer-html5 .rst-content aside.citation p,html.writer-html5 .rst-content aside.footnote p,html.writer-html5 .rst-content div.citation p{font-size:.9rem;line-height:1.2rem;margin-bottom:12px}html.writer-html5 .rst-content aside.citation span.backrefs,html.writer-html5 .rst-content aside.footnote span.backrefs,html.writer-html5 .rst-content div.citation span.backrefs{text-align:left;font-style:italic;margin-left:.65rem;word-break:break-word;word-spacing:-.1rem;max-width:5rem}html.writer-html5 .rst-content aside.citation span.backrefs>a,html.writer-html5 .rst-content aside.footnote span.backrefs>a,html.writer-html5 .rst-content div.citation span.backrefs>a{word-break:keep-all}html.writer-html5 .rst-content aside.citation span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content aside.footnote span.backrefs>a:not(:first-child):before,html.writer-html5 .rst-content div.citation span.backrefs>a:not(:first-child):before{content:" "}html.writer-html5 .rst-content aside.citation span.label,html.writer-html5 .rst-content aside.footnote span.label,html.writer-html5 .rst-content div.citation span.label{line-height:1.2rem}html.writer-html5 .rst-content aside.citation-list,html.writer-html5 .rst-content aside.footnote-list,html.writer-html5 .rst-content div.citation-list{margin-bottom:24px}html.writer-html5 .rst-content dl.option-list kbd{font-size:.9rem}.rst-content table.docutils.footnote,html.writer-html4 .rst-content table.docutils.citation,html.writer-html5 .rst-content aside.footnote,html.writer-html5 .rst-content aside.footnote-list aside.footnote,html.writer-html5 .rst-content div.citation-list>div.citation,html.writer-html5 .rst-content dl.citation,html.writer-html5 .rst-content dl.footnote{color:grey}.rst-content table.docutils.footnote code,.rst-content table.docutils.footnote tt,html.writer-html4 .rst-content table.docutils.citation code,html.writer-html4 .rst-content table.docutils.citation tt,html.writer-html5 .rst-content aside.footnote-list aside.footnote code,html.writer-html5 .rst-content aside.footnote-list aside.footnote tt,html.writer-html5 .rst-content aside.footnote code,html.writer-html5 .rst-content aside.footnote tt,html.writer-html5 .rst-content div.citation-list>div.citation code,html.writer-html5 .rst-content div.citation-list>div.citation tt,html.writer-html5 .rst-content dl.citation code,html.writer-html5 .rst-content dl.citation tt,html.writer-html5 .rst-content dl.footnote code,html.writer-html5 .rst-content dl.footnote tt{color:#555}.rst-content .wy-table-responsive.citation,.rst-content .wy-table-responsive.footnote{margin-bottom:0}.rst-content .wy-table-responsive.citation+:not(.citation),.rst-content .wy-table-responsive.footnote+:not(.footnote){margin-top:24px}.rst-content .wy-table-responsive.citation:last-child,.rst-content .wy-table-responsive.footnote:last-child{margin-bottom:24px}.rst-content table.docutils th{border-color:#e1e4e5}html.writer-html5 .rst-content table.docutils th{border:1px solid #e1e4e5}html.writer-html5 .rst-content table.docutils td>p,html.writer-html5 .rst-content table.docutils th>p{line-height:1rem;margin-bottom:0;font-size:.9rem}.rst-content table.docutils td .last,.rst-content table.docutils td .last>:last-child{margin-bottom:0}.rst-content table.field-list,.rst-content table.field-list td{border:none}.rst-content table.field-list td p{line-height:inherit}.rst-content table.field-list td>strong{display:inline-block}.rst-content table.field-list .field-name{padding-right:10px;text-align:left;white-space:nowrap}.rst-content table.field-list .field-body{text-align:left}.rst-content code,.rst-content tt{color:#000;font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;padding:2px 5px}.rst-content code big,.rst-content code em,.rst-content tt big,.rst-content tt em{font-size:100%!important;line-height:normal}.rst-content code.literal,.rst-content tt.literal{color:#e74c3c;white-space:normal}.rst-content code.xref,.rst-content tt.xref,a .rst-content code,a .rst-content tt{font-weight:700;color:#404040;overflow-wrap:normal}.rst-content kbd,.rst-content pre,.rst-content samp{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace}.rst-content a code,.rst-content a tt{color:#2980b9}.rst-content dl{margin-bottom:24px}.rst-content dl dt{font-weight:700;margin-bottom:12px}.rst-content dl ol,.rst-content dl p,.rst-content dl table,.rst-content dl ul{margin-bottom:12px}.rst-content dl dd{margin:0 0 12px 24px;line-height:24px}.rst-content dl dd>ol:last-child,.rst-content dl dd>p:last-child,.rst-content dl dd>table:last-child,.rst-content dl dd>ul:last-child{margin-bottom:0}html.writer-html4 .rst-content dl:not(.docutils),html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple){margin-bottom:24px}html.writer-html4 .rst-content dl:not(.docutils)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{display:table;margin:6px 0;font-size:90%;line-height:normal;background:#e7f2fa;color:#2980b9;border-top:3px solid #6ab0de;padding:6px;position:relative}html.writer-html4 .rst-content dl:not(.docutils)>dt:before,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:before{color:#6ab0de}html.writer-html4 .rst-content dl:not(.docutils)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt{margin-bottom:6px;border:none;border-left:3px solid #ccc;background:#f0f0f0;color:#555}html.writer-html4 .rst-content dl:not(.docutils) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) dl:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt .headerlink{color:#404040;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils)>dt:first-child,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple)>dt:first-child{margin-top:0}html.writer-html4 .rst-content dl:not(.docutils) code.descclassname,html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descclassname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{background-color:transparent;border:none;padding:0;font-size:100%!important}html.writer-html4 .rst-content dl:not(.docutils) code.descname,html.writer-html4 .rst-content dl:not(.docutils) tt.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) code.descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) tt.descname{font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .optional,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .optional{display:inline-block;padding:0 4px;color:#000;font-weight:700}html.writer-html4 .rst-content dl:not(.docutils) .property,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .property{display:inline-block;padding-right:8px;max-width:100%}html.writer-html4 .rst-content dl:not(.docutils) .k,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .k{font-style:italic}html.writer-html4 .rst-content dl:not(.docutils) .descclassname,html.writer-html4 .rst-content dl:not(.docutils) .descname,html.writer-html4 .rst-content dl:not(.docutils) .sig-name,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descclassname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .descname,html.writer-html5 .rst-content dl[class]:not(.option-list):not(.field-list):not(.footnote):not(.citation):not(.glossary):not(.simple) .sig-name{font-family:SFMono-Regular,Menlo,Monaco,Consolas,Liberation Mono,Courier New,Courier,monospace;color:#000}.rst-content .viewcode-back,.rst-content .viewcode-link{display:inline-block;color:#27ae60;font-size:80%;padding-left:24px}.rst-content .viewcode-back{display:block;float:right}.rst-content p.rubric{margin-bottom:12px;font-weight:700}.rst-content code.download,.rst-content tt.download{background:inherit;padding:inherit;font-weight:400;font-family:inherit;font-size:inherit;color:inherit;border:inherit;white-space:inherit}.rst-content code.download span:first-child,.rst-content tt.download span:first-child{-webkit-font-smoothing:subpixel-antialiased}.rst-content code.download span:first-child:before,.rst-content tt.download span:first-child:before{margin-right:4px}.rst-content .guilabel,.rst-content .menuselection{font-size:80%;font-weight:700;border-radius:4px;padding:2.4px 6px;margin:auto 2px}.rst-content .guilabel,.rst-content .menuselection{border:1px solid #7fbbe3;background:#e7f2fa}.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>.kbd,.rst-content :not(dl.option-list)>:not(dt):not(kbd):not(.kbd)>kbd{color:inherit;font-size:80%;background-color:#fff;border:1px solid #a6a6a6;border-radius:4px;box-shadow:0 2px grey;padding:2.4px 6px;margin:auto 0}.rst-content .versionmodified{font-style:italic}@media screen and (max-width:480px){.rst-content .sidebar{width:100%}}span[id*=MathJax-Span]{color:#404040}.math{text-align:center}@font-face{font-family:Lato;src:url(fonts/lato-normal.woff2?bd03a2cc277bbbc338d464e679fe9942) format("woff2"),url(fonts/lato-normal.woff?27bd77b9162d388cb8d4c4217c7c5e2a) format("woff");font-weight:400;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold.woff2?cccb897485813c7c256901dbca54ecf2) format("woff2"),url(fonts/lato-bold.woff?d878b6c29b10beca227e9eef4246111b) format("woff");font-weight:700;font-style:normal;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-bold-italic.woff2?0b6bb6725576b072c5d0b02ecdd1900d) format("woff2"),url(fonts/lato-bold-italic.woff?9c7e4e9eb485b4a121c760e61bc3707c) format("woff");font-weight:700;font-style:italic;font-display:block}@font-face{font-family:Lato;src:url(fonts/lato-normal-italic.woff2?4eb103b4d12be57cb1d040ed5e162e9d) format("woff2"),url(fonts/lato-normal-italic.woff?f28f2d6482446544ef1ea1ccc6dd5892) format("woff");font-weight:400;font-style:italic;font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:400;src:url(fonts/Roboto-Slab-Regular.woff2?7abf5b8d04d26a2cafea937019bca958) format("woff2"),url(fonts/Roboto-Slab-Regular.woff?c1be9284088d487c5e3ff0a10a92e58c) format("woff");font-display:block}@font-face{font-family:Roboto Slab;font-style:normal;font-weight:700;src:url(fonts/Roboto-Slab-Bold.woff2?9984f4a9bda09be08e83f2506954adbe) format("woff2"),url(fonts/Roboto-Slab-Bold.woff?bed5564a116b05148e3b3bea6fb1162a) format("woff");font-display:block} \ No newline at end of file diff --git a/_static/custom.css b/_static/custom.css new file mode 100644 index 00000000..85fec2de --- /dev/null +++ b/_static/custom.css @@ -0,0 +1,14 @@ +.wy-nav-content { + max-width: 60% !important; +} + +.wy-side-nav-search { + display: block; + width: 300px; + padding: 0.809em; + margin-bottom: 0.809em; + z-index: 200; + background-color: #fcfcfc; + text-align: center; + color: #fcfcfc; +} diff --git a/_static/doctools.js b/_static/doctools.js new file mode 100644 index 00000000..4d67807d --- /dev/null +++ b/_static/doctools.js @@ -0,0 +1,156 @@ +/* + * doctools.js + * ~~~~~~~~~~~ + * + * Base JavaScript utilities for all Sphinx HTML documentation. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +const BLACKLISTED_KEY_CONTROL_ELEMENTS = new Set([ + "TEXTAREA", + "INPUT", + "SELECT", + "BUTTON", +]); + +const _ready = (callback) => { + if (document.readyState !== "loading") { + callback(); + } else { + document.addEventListener("DOMContentLoaded", callback); + } +}; + +/** + * Small JavaScript module for the documentation. + */ +const Documentation = { + init: () => { + Documentation.initDomainIndexTable(); + Documentation.initOnKeyListeners(); + }, + + /** + * i18n support + */ + TRANSLATIONS: {}, + PLURAL_EXPR: (n) => (n === 1 ? 0 : 1), + LOCALE: "unknown", + + // gettext and ngettext don't access this so that the functions + // can safely bound to a different name (_ = Documentation.gettext) + gettext: (string) => { + const translated = Documentation.TRANSLATIONS[string]; + switch (typeof translated) { + case "undefined": + return string; // no translation + case "string": + return translated; // translation exists + default: + return translated[0]; // (singular, plural) translation tuple exists + } + }, + + ngettext: (singular, plural, n) => { + const translated = Documentation.TRANSLATIONS[singular]; + if (typeof translated !== "undefined") + return translated[Documentation.PLURAL_EXPR(n)]; + return n === 1 ? singular : plural; + }, + + addTranslations: (catalog) => { + Object.assign(Documentation.TRANSLATIONS, catalog.messages); + Documentation.PLURAL_EXPR = new Function( + "n", + `return (${catalog.plural_expr})` + ); + Documentation.LOCALE = catalog.locale; + }, + + /** + * helper function to focus on search bar + */ + focusSearchBar: () => { + document.querySelectorAll("input[name=q]")[0]?.focus(); + }, + + /** + * Initialise the domain index toggle buttons + */ + initDomainIndexTable: () => { + const toggler = (el) => { + const idNumber = el.id.substr(7); + const toggledRows = document.querySelectorAll(`tr.cg-${idNumber}`); + if (el.src.substr(-9) === "minus.png") { + el.src = `${el.src.substr(0, el.src.length - 9)}plus.png`; + toggledRows.forEach((el) => (el.style.display = "none")); + } else { + el.src = `${el.src.substr(0, el.src.length - 8)}minus.png`; + toggledRows.forEach((el) => (el.style.display = "")); + } + }; + + const togglerElements = document.querySelectorAll("img.toggler"); + togglerElements.forEach((el) => + el.addEventListener("click", (event) => toggler(event.currentTarget)) + ); + togglerElements.forEach((el) => (el.style.display = "")); + if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) togglerElements.forEach(toggler); + }, + + initOnKeyListeners: () => { + // only install a listener if it is really needed + if ( + !DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS && + !DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS + ) + return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.altKey || event.ctrlKey || event.metaKey) return; + + if (!event.shiftKey) { + switch (event.key) { + case "ArrowLeft": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const prevLink = document.querySelector('link[rel="prev"]'); + if (prevLink && prevLink.href) { + window.location.href = prevLink.href; + event.preventDefault(); + } + break; + case "ArrowRight": + if (!DOCUMENTATION_OPTIONS.NAVIGATION_WITH_KEYS) break; + + const nextLink = document.querySelector('link[rel="next"]'); + if (nextLink && nextLink.href) { + window.location.href = nextLink.href; + event.preventDefault(); + } + break; + } + } + + // some keyboard layouts may need Shift to get / + switch (event.key) { + case "/": + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) break; + Documentation.focusSearchBar(); + event.preventDefault(); + } + }); + }, +}; + +// quick alias for translations +const _ = Documentation.gettext; + +_ready(Documentation.init); diff --git a/_static/documentation_options.js b/_static/documentation_options.js new file mode 100644 index 00000000..095e0ada --- /dev/null +++ b/_static/documentation_options.js @@ -0,0 +1,13 @@ +const DOCUMENTATION_OPTIONS = { + VERSION: '0.0.4', + LANGUAGE: 'en', + COLLAPSE_INDEX: false, + BUILDER: 'html', + FILE_SUFFIX: '.html', + LINK_SUFFIX: '.html', + HAS_SOURCE: true, + SOURCELINK_SUFFIX: '.txt', + NAVIGATION_WITH_KEYS: false, + SHOW_SEARCH_SUMMARY: true, + ENABLE_SEARCH_SHORTCUTS: true, +}; \ No newline at end of file diff --git a/_static/file.png b/_static/file.png new file mode 100644 index 00000000..a858a410 Binary files /dev/null and b/_static/file.png differ diff --git a/_static/jquery.js b/_static/jquery.js new file mode 100644 index 00000000..c4c6022f --- /dev/null +++ b/_static/jquery.js @@ -0,0 +1,2 @@ +/*! jQuery v3.6.0 | (c) OpenJS Foundation and other contributors | jquery.org/license */ +!function(e,t){"use strict";"object"==typeof module&&"object"==typeof module.exports?module.exports=e.document?t(e,!0):function(e){if(!e.document)throw new Error("jQuery requires a window with a document");return t(e)}:t(e)}("undefined"!=typeof window?window:this,function(C,e){"use strict";var t=[],r=Object.getPrototypeOf,s=t.slice,g=t.flat?function(e){return t.flat.call(e)}:function(e){return t.concat.apply([],e)},u=t.push,i=t.indexOf,n={},o=n.toString,v=n.hasOwnProperty,a=v.toString,l=a.call(Object),y={},m=function(e){return"function"==typeof e&&"number"!=typeof e.nodeType&&"function"!=typeof e.item},x=function(e){return null!=e&&e===e.window},E=C.document,c={type:!0,src:!0,nonce:!0,noModule:!0};function b(e,t,n){var r,i,o=(n=n||E).createElement("script");if(o.text=e,t)for(r in c)(i=t[r]||t.getAttribute&&t.getAttribute(r))&&o.setAttribute(r,i);n.head.appendChild(o).parentNode.removeChild(o)}function w(e){return null==e?e+"":"object"==typeof e||"function"==typeof e?n[o.call(e)]||"object":typeof e}var f="3.6.0",S=function(e,t){return new S.fn.init(e,t)};function p(e){var t=!!e&&"length"in e&&e.length,n=w(e);return!m(e)&&!x(e)&&("array"===n||0===t||"number"==typeof t&&0+~]|"+M+")"+M+"*"),U=new RegExp(M+"|>"),X=new RegExp(F),V=new RegExp("^"+I+"$"),G={ID:new RegExp("^#("+I+")"),CLASS:new RegExp("^\\.("+I+")"),TAG:new RegExp("^("+I+"|[*])"),ATTR:new RegExp("^"+W),PSEUDO:new RegExp("^"+F),CHILD:new RegExp("^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\("+M+"*(even|odd|(([+-]|)(\\d*)n|)"+M+"*(?:([+-]|)"+M+"*(\\d+)|))"+M+"*\\)|)","i"),bool:new RegExp("^(?:"+R+")$","i"),needsContext:new RegExp("^"+M+"*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\("+M+"*((?:-\\d)?\\d*)"+M+"*\\)|)(?=[^-]|$)","i")},Y=/HTML$/i,Q=/^(?:input|select|textarea|button)$/i,J=/^h\d$/i,K=/^[^{]+\{\s*\[native \w/,Z=/^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/,ee=/[+~]/,te=new RegExp("\\\\[\\da-fA-F]{1,6}"+M+"?|\\\\([^\\r\\n\\f])","g"),ne=function(e,t){var n="0x"+e.slice(1)-65536;return t||(n<0?String.fromCharCode(n+65536):String.fromCharCode(n>>10|55296,1023&n|56320))},re=/([\0-\x1f\x7f]|^-?\d)|^-$|[^\0-\x1f\x7f-\uFFFF\w-]/g,ie=function(e,t){return t?"\0"===e?"\ufffd":e.slice(0,-1)+"\\"+e.charCodeAt(e.length-1).toString(16)+" ":"\\"+e},oe=function(){T()},ae=be(function(e){return!0===e.disabled&&"fieldset"===e.nodeName.toLowerCase()},{dir:"parentNode",next:"legend"});try{H.apply(t=O.call(p.childNodes),p.childNodes),t[p.childNodes.length].nodeType}catch(e){H={apply:t.length?function(e,t){L.apply(e,O.call(t))}:function(e,t){var n=e.length,r=0;while(e[n++]=t[r++]);e.length=n-1}}}function se(t,e,n,r){var i,o,a,s,u,l,c,f=e&&e.ownerDocument,p=e?e.nodeType:9;if(n=n||[],"string"!=typeof t||!t||1!==p&&9!==p&&11!==p)return n;if(!r&&(T(e),e=e||C,E)){if(11!==p&&(u=Z.exec(t)))if(i=u[1]){if(9===p){if(!(a=e.getElementById(i)))return n;if(a.id===i)return n.push(a),n}else if(f&&(a=f.getElementById(i))&&y(e,a)&&a.id===i)return n.push(a),n}else{if(u[2])return H.apply(n,e.getElementsByTagName(t)),n;if((i=u[3])&&d.getElementsByClassName&&e.getElementsByClassName)return H.apply(n,e.getElementsByClassName(i)),n}if(d.qsa&&!N[t+" "]&&(!v||!v.test(t))&&(1!==p||"object"!==e.nodeName.toLowerCase())){if(c=t,f=e,1===p&&(U.test(t)||z.test(t))){(f=ee.test(t)&&ye(e.parentNode)||e)===e&&d.scope||((s=e.getAttribute("id"))?s=s.replace(re,ie):e.setAttribute("id",s=S)),o=(l=h(t)).length;while(o--)l[o]=(s?"#"+s:":scope")+" "+xe(l[o]);c=l.join(",")}try{return H.apply(n,f.querySelectorAll(c)),n}catch(e){N(t,!0)}finally{s===S&&e.removeAttribute("id")}}}return g(t.replace($,"$1"),e,n,r)}function ue(){var r=[];return function e(t,n){return r.push(t+" ")>b.cacheLength&&delete e[r.shift()],e[t+" "]=n}}function le(e){return e[S]=!0,e}function ce(e){var t=C.createElement("fieldset");try{return!!e(t)}catch(e){return!1}finally{t.parentNode&&t.parentNode.removeChild(t),t=null}}function fe(e,t){var n=e.split("|"),r=n.length;while(r--)b.attrHandle[n[r]]=t}function pe(e,t){var n=t&&e,r=n&&1===e.nodeType&&1===t.nodeType&&e.sourceIndex-t.sourceIndex;if(r)return r;if(n)while(n=n.nextSibling)if(n===t)return-1;return e?1:-1}function de(t){return function(e){return"input"===e.nodeName.toLowerCase()&&e.type===t}}function he(n){return function(e){var t=e.nodeName.toLowerCase();return("input"===t||"button"===t)&&e.type===n}}function ge(t){return function(e){return"form"in e?e.parentNode&&!1===e.disabled?"label"in e?"label"in e.parentNode?e.parentNode.disabled===t:e.disabled===t:e.isDisabled===t||e.isDisabled!==!t&&ae(e)===t:e.disabled===t:"label"in e&&e.disabled===t}}function ve(a){return le(function(o){return o=+o,le(function(e,t){var n,r=a([],e.length,o),i=r.length;while(i--)e[n=r[i]]&&(e[n]=!(t[n]=e[n]))})})}function ye(e){return e&&"undefined"!=typeof e.getElementsByTagName&&e}for(e in d=se.support={},i=se.isXML=function(e){var t=e&&e.namespaceURI,n=e&&(e.ownerDocument||e).documentElement;return!Y.test(t||n&&n.nodeName||"HTML")},T=se.setDocument=function(e){var t,n,r=e?e.ownerDocument||e:p;return r!=C&&9===r.nodeType&&r.documentElement&&(a=(C=r).documentElement,E=!i(C),p!=C&&(n=C.defaultView)&&n.top!==n&&(n.addEventListener?n.addEventListener("unload",oe,!1):n.attachEvent&&n.attachEvent("onunload",oe)),d.scope=ce(function(e){return a.appendChild(e).appendChild(C.createElement("div")),"undefined"!=typeof e.querySelectorAll&&!e.querySelectorAll(":scope fieldset div").length}),d.attributes=ce(function(e){return e.className="i",!e.getAttribute("className")}),d.getElementsByTagName=ce(function(e){return e.appendChild(C.createComment("")),!e.getElementsByTagName("*").length}),d.getElementsByClassName=K.test(C.getElementsByClassName),d.getById=ce(function(e){return a.appendChild(e).id=S,!C.getElementsByName||!C.getElementsByName(S).length}),d.getById?(b.filter.ID=function(e){var t=e.replace(te,ne);return function(e){return e.getAttribute("id")===t}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n=t.getElementById(e);return n?[n]:[]}}):(b.filter.ID=function(e){var n=e.replace(te,ne);return function(e){var t="undefined"!=typeof e.getAttributeNode&&e.getAttributeNode("id");return t&&t.value===n}},b.find.ID=function(e,t){if("undefined"!=typeof t.getElementById&&E){var n,r,i,o=t.getElementById(e);if(o){if((n=o.getAttributeNode("id"))&&n.value===e)return[o];i=t.getElementsByName(e),r=0;while(o=i[r++])if((n=o.getAttributeNode("id"))&&n.value===e)return[o]}return[]}}),b.find.TAG=d.getElementsByTagName?function(e,t){return"undefined"!=typeof t.getElementsByTagName?t.getElementsByTagName(e):d.qsa?t.querySelectorAll(e):void 0}:function(e,t){var n,r=[],i=0,o=t.getElementsByTagName(e);if("*"===e){while(n=o[i++])1===n.nodeType&&r.push(n);return r}return o},b.find.CLASS=d.getElementsByClassName&&function(e,t){if("undefined"!=typeof t.getElementsByClassName&&E)return t.getElementsByClassName(e)},s=[],v=[],(d.qsa=K.test(C.querySelectorAll))&&(ce(function(e){var t;a.appendChild(e).innerHTML="",e.querySelectorAll("[msallowcapture^='']").length&&v.push("[*^$]="+M+"*(?:''|\"\")"),e.querySelectorAll("[selected]").length||v.push("\\["+M+"*(?:value|"+R+")"),e.querySelectorAll("[id~="+S+"-]").length||v.push("~="),(t=C.createElement("input")).setAttribute("name",""),e.appendChild(t),e.querySelectorAll("[name='']").length||v.push("\\["+M+"*name"+M+"*="+M+"*(?:''|\"\")"),e.querySelectorAll(":checked").length||v.push(":checked"),e.querySelectorAll("a#"+S+"+*").length||v.push(".#.+[+~]"),e.querySelectorAll("\\\f"),v.push("[\\r\\n\\f]")}),ce(function(e){e.innerHTML="";var t=C.createElement("input");t.setAttribute("type","hidden"),e.appendChild(t).setAttribute("name","D"),e.querySelectorAll("[name=d]").length&&v.push("name"+M+"*[*^$|!~]?="),2!==e.querySelectorAll(":enabled").length&&v.push(":enabled",":disabled"),a.appendChild(e).disabled=!0,2!==e.querySelectorAll(":disabled").length&&v.push(":enabled",":disabled"),e.querySelectorAll("*,:x"),v.push(",.*:")})),(d.matchesSelector=K.test(c=a.matches||a.webkitMatchesSelector||a.mozMatchesSelector||a.oMatchesSelector||a.msMatchesSelector))&&ce(function(e){d.disconnectedMatch=c.call(e,"*"),c.call(e,"[s!='']:x"),s.push("!=",F)}),v=v.length&&new RegExp(v.join("|")),s=s.length&&new RegExp(s.join("|")),t=K.test(a.compareDocumentPosition),y=t||K.test(a.contains)?function(e,t){var n=9===e.nodeType?e.documentElement:e,r=t&&t.parentNode;return e===r||!(!r||1!==r.nodeType||!(n.contains?n.contains(r):e.compareDocumentPosition&&16&e.compareDocumentPosition(r)))}:function(e,t){if(t)while(t=t.parentNode)if(t===e)return!0;return!1},j=t?function(e,t){if(e===t)return l=!0,0;var n=!e.compareDocumentPosition-!t.compareDocumentPosition;return n||(1&(n=(e.ownerDocument||e)==(t.ownerDocument||t)?e.compareDocumentPosition(t):1)||!d.sortDetached&&t.compareDocumentPosition(e)===n?e==C||e.ownerDocument==p&&y(p,e)?-1:t==C||t.ownerDocument==p&&y(p,t)?1:u?P(u,e)-P(u,t):0:4&n?-1:1)}:function(e,t){if(e===t)return l=!0,0;var n,r=0,i=e.parentNode,o=t.parentNode,a=[e],s=[t];if(!i||!o)return e==C?-1:t==C?1:i?-1:o?1:u?P(u,e)-P(u,t):0;if(i===o)return pe(e,t);n=e;while(n=n.parentNode)a.unshift(n);n=t;while(n=n.parentNode)s.unshift(n);while(a[r]===s[r])r++;return r?pe(a[r],s[r]):a[r]==p?-1:s[r]==p?1:0}),C},se.matches=function(e,t){return se(e,null,null,t)},se.matchesSelector=function(e,t){if(T(e),d.matchesSelector&&E&&!N[t+" "]&&(!s||!s.test(t))&&(!v||!v.test(t)))try{var n=c.call(e,t);if(n||d.disconnectedMatch||e.document&&11!==e.document.nodeType)return n}catch(e){N(t,!0)}return 0":{dir:"parentNode",first:!0}," ":{dir:"parentNode"},"+":{dir:"previousSibling",first:!0},"~":{dir:"previousSibling"}},preFilter:{ATTR:function(e){return e[1]=e[1].replace(te,ne),e[3]=(e[3]||e[4]||e[5]||"").replace(te,ne),"~="===e[2]&&(e[3]=" "+e[3]+" "),e.slice(0,4)},CHILD:function(e){return e[1]=e[1].toLowerCase(),"nth"===e[1].slice(0,3)?(e[3]||se.error(e[0]),e[4]=+(e[4]?e[5]+(e[6]||1):2*("even"===e[3]||"odd"===e[3])),e[5]=+(e[7]+e[8]||"odd"===e[3])):e[3]&&se.error(e[0]),e},PSEUDO:function(e){var t,n=!e[6]&&e[2];return G.CHILD.test(e[0])?null:(e[3]?e[2]=e[4]||e[5]||"":n&&X.test(n)&&(t=h(n,!0))&&(t=n.indexOf(")",n.length-t)-n.length)&&(e[0]=e[0].slice(0,t),e[2]=n.slice(0,t)),e.slice(0,3))}},filter:{TAG:function(e){var t=e.replace(te,ne).toLowerCase();return"*"===e?function(){return!0}:function(e){return e.nodeName&&e.nodeName.toLowerCase()===t}},CLASS:function(e){var t=m[e+" "];return t||(t=new RegExp("(^|"+M+")"+e+"("+M+"|$)"))&&m(e,function(e){return t.test("string"==typeof e.className&&e.className||"undefined"!=typeof e.getAttribute&&e.getAttribute("class")||"")})},ATTR:function(n,r,i){return function(e){var t=se.attr(e,n);return null==t?"!="===r:!r||(t+="","="===r?t===i:"!="===r?t!==i:"^="===r?i&&0===t.indexOf(i):"*="===r?i&&-1:\x20\t\r\n\f]*)[\x20\t\r\n\f]*\/?>(?:<\/\1>|)$/i;function j(e,n,r){return m(n)?S.grep(e,function(e,t){return!!n.call(e,t,e)!==r}):n.nodeType?S.grep(e,function(e){return e===n!==r}):"string"!=typeof n?S.grep(e,function(e){return-1)[^>]*|#([\w-]+))$/;(S.fn.init=function(e,t,n){var r,i;if(!e)return this;if(n=n||D,"string"==typeof e){if(!(r="<"===e[0]&&">"===e[e.length-1]&&3<=e.length?[null,e,null]:q.exec(e))||!r[1]&&t)return!t||t.jquery?(t||n).find(e):this.constructor(t).find(e);if(r[1]){if(t=t instanceof S?t[0]:t,S.merge(this,S.parseHTML(r[1],t&&t.nodeType?t.ownerDocument||t:E,!0)),N.test(r[1])&&S.isPlainObject(t))for(r in t)m(this[r])?this[r](t[r]):this.attr(r,t[r]);return this}return(i=E.getElementById(r[2]))&&(this[0]=i,this.length=1),this}return e.nodeType?(this[0]=e,this.length=1,this):m(e)?void 0!==n.ready?n.ready(e):e(S):S.makeArray(e,this)}).prototype=S.fn,D=S(E);var L=/^(?:parents|prev(?:Until|All))/,H={children:!0,contents:!0,next:!0,prev:!0};function O(e,t){while((e=e[t])&&1!==e.nodeType);return e}S.fn.extend({has:function(e){var t=S(e,this),n=t.length;return this.filter(function(){for(var e=0;e\x20\t\r\n\f]*)/i,he=/^$|^module$|\/(?:java|ecma)script/i;ce=E.createDocumentFragment().appendChild(E.createElement("div")),(fe=E.createElement("input")).setAttribute("type","radio"),fe.setAttribute("checked","checked"),fe.setAttribute("name","t"),ce.appendChild(fe),y.checkClone=ce.cloneNode(!0).cloneNode(!0).lastChild.checked,ce.innerHTML="",y.noCloneChecked=!!ce.cloneNode(!0).lastChild.defaultValue,ce.innerHTML="",y.option=!!ce.lastChild;var ge={thead:[1,"","
"],col:[2,"","
"],tr:[2,"","
"],td:[3,"","
"],_default:[0,"",""]};function ve(e,t){var n;return n="undefined"!=typeof e.getElementsByTagName?e.getElementsByTagName(t||"*"):"undefined"!=typeof e.querySelectorAll?e.querySelectorAll(t||"*"):[],void 0===t||t&&A(e,t)?S.merge([e],n):n}function ye(e,t){for(var n=0,r=e.length;n",""]);var me=/<|&#?\w+;/;function xe(e,t,n,r,i){for(var o,a,s,u,l,c,f=t.createDocumentFragment(),p=[],d=0,h=e.length;d\s*$/g;function je(e,t){return A(e,"table")&&A(11!==t.nodeType?t:t.firstChild,"tr")&&S(e).children("tbody")[0]||e}function De(e){return e.type=(null!==e.getAttribute("type"))+"/"+e.type,e}function qe(e){return"true/"===(e.type||"").slice(0,5)?e.type=e.type.slice(5):e.removeAttribute("type"),e}function Le(e,t){var n,r,i,o,a,s;if(1===t.nodeType){if(Y.hasData(e)&&(s=Y.get(e).events))for(i in Y.remove(t,"handle events"),s)for(n=0,r=s[i].length;n").attr(n.scriptAttrs||{}).prop({charset:n.scriptCharset,src:n.url}).on("load error",i=function(e){r.remove(),i=null,e&&t("error"===e.type?404:200,e.type)}),E.head.appendChild(r[0])},abort:function(){i&&i()}}});var _t,zt=[],Ut=/(=)\?(?=&|$)|\?\?/;S.ajaxSetup({jsonp:"callback",jsonpCallback:function(){var e=zt.pop()||S.expando+"_"+wt.guid++;return this[e]=!0,e}}),S.ajaxPrefilter("json jsonp",function(e,t,n){var r,i,o,a=!1!==e.jsonp&&(Ut.test(e.url)?"url":"string"==typeof e.data&&0===(e.contentType||"").indexOf("application/x-www-form-urlencoded")&&Ut.test(e.data)&&"data");if(a||"jsonp"===e.dataTypes[0])return r=e.jsonpCallback=m(e.jsonpCallback)?e.jsonpCallback():e.jsonpCallback,a?e[a]=e[a].replace(Ut,"$1"+r):!1!==e.jsonp&&(e.url+=(Tt.test(e.url)?"&":"?")+e.jsonp+"="+r),e.converters["script json"]=function(){return o||S.error(r+" was not called"),o[0]},e.dataTypes[0]="json",i=C[r],C[r]=function(){o=arguments},n.always(function(){void 0===i?S(C).removeProp(r):C[r]=i,e[r]&&(e.jsonpCallback=t.jsonpCallback,zt.push(r)),o&&m(i)&&i(o[0]),o=i=void 0}),"script"}),y.createHTMLDocument=((_t=E.implementation.createHTMLDocument("").body).innerHTML="
",2===_t.childNodes.length),S.parseHTML=function(e,t,n){return"string"!=typeof e?[]:("boolean"==typeof t&&(n=t,t=!1),t||(y.createHTMLDocument?((r=(t=E.implementation.createHTMLDocument("")).createElement("base")).href=E.location.href,t.head.appendChild(r)):t=E),o=!n&&[],(i=N.exec(e))?[t.createElement(i[1])]:(i=xe([e],t,o),o&&o.length&&S(o).remove(),S.merge([],i.childNodes)));var r,i,o},S.fn.load=function(e,t,n){var r,i,o,a=this,s=e.indexOf(" ");return-1").append(S.parseHTML(e)).find(r):e)}).always(n&&function(e,t){a.each(function(){n.apply(this,o||[e.responseText,t,e])})}),this},S.expr.pseudos.animated=function(t){return S.grep(S.timers,function(e){return t===e.elem}).length},S.offset={setOffset:function(e,t,n){var r,i,o,a,s,u,l=S.css(e,"position"),c=S(e),f={};"static"===l&&(e.style.position="relative"),s=c.offset(),o=S.css(e,"top"),u=S.css(e,"left"),("absolute"===l||"fixed"===l)&&-1<(o+u).indexOf("auto")?(a=(r=c.position()).top,i=r.left):(a=parseFloat(o)||0,i=parseFloat(u)||0),m(t)&&(t=t.call(e,n,S.extend({},s))),null!=t.top&&(f.top=t.top-s.top+a),null!=t.left&&(f.left=t.left-s.left+i),"using"in t?t.using.call(e,f):c.css(f)}},S.fn.extend({offset:function(t){if(arguments.length)return void 0===t?this:this.each(function(e){S.offset.setOffset(this,t,e)});var e,n,r=this[0];return r?r.getClientRects().length?(e=r.getBoundingClientRect(),n=r.ownerDocument.defaultView,{top:e.top+n.pageYOffset,left:e.left+n.pageXOffset}):{top:0,left:0}:void 0},position:function(){if(this[0]){var e,t,n,r=this[0],i={top:0,left:0};if("fixed"===S.css(r,"position"))t=r.getBoundingClientRect();else{t=this.offset(),n=r.ownerDocument,e=r.offsetParent||n.documentElement;while(e&&(e===n.body||e===n.documentElement)&&"static"===S.css(e,"position"))e=e.parentNode;e&&e!==r&&1===e.nodeType&&((i=S(e).offset()).top+=S.css(e,"borderTopWidth",!0),i.left+=S.css(e,"borderLeftWidth",!0))}return{top:t.top-i.top-S.css(r,"marginTop",!0),left:t.left-i.left-S.css(r,"marginLeft",!0)}}},offsetParent:function(){return this.map(function(){var e=this.offsetParent;while(e&&"static"===S.css(e,"position"))e=e.offsetParent;return e||re})}}),S.each({scrollLeft:"pageXOffset",scrollTop:"pageYOffset"},function(t,i){var o="pageYOffset"===i;S.fn[t]=function(e){return $(this,function(e,t,n){var r;if(x(e)?r=e:9===e.nodeType&&(r=e.defaultView),void 0===n)return r?r[i]:e[t];r?r.scrollTo(o?r.pageXOffset:n,o?n:r.pageYOffset):e[t]=n},t,e,arguments.length)}}),S.each(["top","left"],function(e,n){S.cssHooks[n]=Fe(y.pixelPosition,function(e,t){if(t)return t=We(e,n),Pe.test(t)?S(e).position()[n]+"px":t})}),S.each({Height:"height",Width:"width"},function(a,s){S.each({padding:"inner"+a,content:s,"":"outer"+a},function(r,o){S.fn[o]=function(e,t){var n=arguments.length&&(r||"boolean"!=typeof e),i=r||(!0===e||!0===t?"margin":"border");return $(this,function(e,t,n){var r;return x(e)?0===o.indexOf("outer")?e["inner"+a]:e.document.documentElement["client"+a]:9===e.nodeType?(r=e.documentElement,Math.max(e.body["scroll"+a],r["scroll"+a],e.body["offset"+a],r["offset"+a],r["client"+a])):void 0===n?S.css(e,t,i):S.style(e,t,n,i)},s,n?e:void 0,n)}})}),S.each(["ajaxStart","ajaxStop","ajaxComplete","ajaxError","ajaxSuccess","ajaxSend"],function(e,t){S.fn[t]=function(e){return this.on(t,e)}}),S.fn.extend({bind:function(e,t,n){return this.on(e,null,t,n)},unbind:function(e,t){return this.off(e,null,t)},delegate:function(e,t,n,r){return this.on(t,e,n,r)},undelegate:function(e,t,n){return 1===arguments.length?this.off(e,"**"):this.off(t,e||"**",n)},hover:function(e,t){return this.mouseenter(e).mouseleave(t||e)}}),S.each("blur focus focusin focusout resize scroll click dblclick mousedown mouseup mousemove mouseover mouseout mouseenter mouseleave change select submit keydown keypress keyup contextmenu".split(" "),function(e,n){S.fn[n]=function(e,t){return 0",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=y.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=y.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),y.elements=c+" "+a,j(b)}function f(a){var b=x[a[v]];return b||(b={},w++,a[v]=w,x[w]=b),b}function g(a,c,d){if(c||(c=b),q)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():u.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||t.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),q)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return y.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(y,b.frag)}function j(a){a||(a=b);var d=f(a);return!y.shivCSS||p||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),q||i(a,d),a}function k(a){for(var b,c=a.getElementsByTagName("*"),e=c.length,f=RegExp("^(?:"+d().join("|")+")$","i"),g=[];e--;)b=c[e],f.test(b.nodeName)&&g.push(b.applyElement(l(b)));return g}function l(a){for(var b,c=a.attributes,d=c.length,e=a.ownerDocument.createElement(A+":"+a.nodeName);d--;)b=c[d],b.specified&&e.setAttribute(b.nodeName,b.nodeValue);return e.style.cssText=a.style.cssText,e}function m(a){for(var b,c=a.split("{"),e=c.length,f=RegExp("(^|[\\s,>+~])("+d().join("|")+")(?=[[\\s,>+~#.:]|$)","gi"),g="$1"+A+"\\:$2";e--;)b=c[e]=c[e].split("}"),b[b.length-1]=b[b.length-1].replace(f,g),c[e]=b.join("}");return c.join("{")}function n(a){for(var b=a.length;b--;)a[b].removeNode()}function o(a){function b(){clearTimeout(g._removeSheetTimer),d&&d.removeNode(!0),d=null}var d,e,g=f(a),h=a.namespaces,i=a.parentWindow;return!B||a.printShived?a:("undefined"==typeof h[A]&&h.add(A),i.attachEvent("onbeforeprint",function(){b();for(var f,g,h,i=a.styleSheets,j=[],l=i.length,n=Array(l);l--;)n[l]=i[l];for(;h=n.pop();)if(!h.disabled&&z.test(h.media)){try{f=h.imports,g=f.length}catch(o){g=0}for(l=0;g>l;l++)n.push(f[l]);try{j.push(h.cssText)}catch(o){}}j=m(j.reverse().join("")),e=k(a),d=c(a,j)}),i.attachEvent("onafterprint",function(){n(e),clearTimeout(g._removeSheetTimer),g._removeSheetTimer=setTimeout(b,500)}),a.printShived=!0,a)}var p,q,r="3.7.3",s=a.html5||{},t=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,u=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,v="_html5shiv",w=0,x={};!function(){try{var a=b.createElement("a");a.innerHTML="",p="hidden"in a,q=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){p=!0,q=!0}}();var y={elements:s.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:r,shivCSS:s.shivCSS!==!1,supportsUnknownElements:q,shivMethods:s.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=y,j(b);var z=/^$|\b(?:all|print)\b/,A="html5shiv",B=!q&&function(){var c=b.documentElement;return!("undefined"==typeof b.namespaces||"undefined"==typeof b.parentWindow||"undefined"==typeof c.applyElement||"undefined"==typeof c.removeNode||"undefined"==typeof a.attachEvent)}();y.type+=" print",y.shivPrint=o,o(b),"object"==typeof module&&module.exports&&(module.exports=y)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/_static/js/html5shiv.min.js b/_static/js/html5shiv.min.js new file mode 100644 index 00000000..cd1c674f --- /dev/null +++ b/_static/js/html5shiv.min.js @@ -0,0 +1,4 @@ +/** +* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed +*/ +!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document); \ No newline at end of file diff --git a/_static/js/theme.js b/_static/js/theme.js new file mode 100644 index 00000000..1fddb6ee --- /dev/null +++ b/_static/js/theme.js @@ -0,0 +1 @@ +!function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("
"),n("table.docutils.footnote").wrap("
"),n("table.docutils.citation").wrap("
"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n(''),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t0 + var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1 + var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1 + var s_v = "^(" + C + ")?" + v; // vowel in stem + + this.stemWord = function (w) { + var stem; + var suffix; + var firstch; + var origword = w; + + if (w.length < 3) + return w; + + var re; + var re2; + var re3; + var re4; + + firstch = w.substr(0,1); + if (firstch == "y") + w = firstch.toUpperCase() + w.substr(1); + + // Step 1a + re = /^(.+?)(ss|i)es$/; + re2 = /^(.+?)([^s])s$/; + + if (re.test(w)) + w = w.replace(re,"$1$2"); + else if (re2.test(w)) + w = w.replace(re2,"$1$2"); + + // Step 1b + re = /^(.+?)eed$/; + re2 = /^(.+?)(ed|ing)$/; + if (re.test(w)) { + var fp = re.exec(w); + re = new RegExp(mgr0); + if (re.test(fp[1])) { + re = /.$/; + w = w.replace(re,""); + } + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1]; + re2 = new RegExp(s_v); + if (re2.test(stem)) { + w = stem; + re2 = /(at|bl|iz)$/; + re3 = new RegExp("([^aeiouylsz])\\1$"); + re4 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re2.test(w)) + w = w + "e"; + else if (re3.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + else if (re4.test(w)) + w = w + "e"; + } + } + + // Step 1c + re = /^(.+?)y$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(s_v); + if (re.test(stem)) + w = stem + "i"; + } + + // Step 2 + re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step2list[suffix]; + } + + // Step 3 + re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + suffix = fp[2]; + re = new RegExp(mgr0); + if (re.test(stem)) + w = stem + step3list[suffix]; + } + + // Step 4 + re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/; + re2 = /^(.+?)(s|t)(ion)$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + if (re.test(stem)) + w = stem; + } + else if (re2.test(w)) { + var fp = re2.exec(w); + stem = fp[1] + fp[2]; + re2 = new RegExp(mgr1); + if (re2.test(stem)) + w = stem; + } + + // Step 5 + re = /^(.+?)e$/; + if (re.test(w)) { + var fp = re.exec(w); + stem = fp[1]; + re = new RegExp(mgr1); + re2 = new RegExp(meq1); + re3 = new RegExp("^" + C + v + "[^aeiouwxy]$"); + if (re.test(stem) || (re2.test(stem) && !(re3.test(stem)))) + w = stem; + } + re = /ll$/; + re2 = new RegExp(mgr1); + if (re.test(w) && re2.test(w)) { + re = /.$/; + w = w.replace(re,""); + } + + // and turn initial Y back to y + if (firstch == "y") + w = firstch.toLowerCase() + w.substr(1); + return w; + } +} + diff --git a/_static/minus.png b/_static/minus.png new file mode 100644 index 00000000..d96755fd Binary files /dev/null and b/_static/minus.png differ diff --git a/_static/plus.png b/_static/plus.png new file mode 100644 index 00000000..7107cec9 Binary files /dev/null and b/_static/plus.png differ diff --git a/_static/pygments.css b/_static/pygments.css new file mode 100644 index 00000000..0d49244e --- /dev/null +++ b/_static/pygments.css @@ -0,0 +1,75 @@ +pre { line-height: 125%; } +td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; } +td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; } +.highlight .hll { background-color: #ffffcc } +.highlight { background: #eeffcc; } +.highlight .c { color: #408090; font-style: italic } /* Comment */ +.highlight .err { border: 1px solid #FF0000 } /* Error */ +.highlight .k { color: #007020; font-weight: bold } /* Keyword */ +.highlight .o { color: #666666 } /* Operator */ +.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */ +.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */ +.highlight .cp { color: #007020 } /* Comment.Preproc */ +.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */ +.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */ +.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */ +.highlight .gd { color: #A00000 } /* Generic.Deleted */ +.highlight .ge { font-style: italic } /* Generic.Emph */ +.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */ +.highlight .gr { color: #FF0000 } /* Generic.Error */ +.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */ +.highlight .gi { color: #00A000 } /* Generic.Inserted */ +.highlight .go { color: #333333 } /* Generic.Output */ +.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */ +.highlight .gs { font-weight: bold } /* Generic.Strong */ +.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */ +.highlight .gt { color: #0044DD } /* Generic.Traceback */ +.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */ +.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */ +.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */ +.highlight .kp { color: #007020 } /* Keyword.Pseudo */ +.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */ +.highlight .kt { color: #902000 } /* Keyword.Type */ +.highlight .m { color: #208050 } /* Literal.Number */ +.highlight .s { color: #4070a0 } /* Literal.String */ +.highlight .na { color: #4070a0 } /* Name.Attribute */ +.highlight .nb { color: #007020 } /* Name.Builtin */ +.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */ +.highlight .no { color: #60add5 } /* Name.Constant */ +.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */ +.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */ +.highlight .ne { color: #007020 } /* Name.Exception */ +.highlight .nf { color: #06287e } /* Name.Function */ +.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */ +.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */ +.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */ +.highlight .nv { color: #bb60d5 } /* Name.Variable */ +.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */ +.highlight .w { color: #bbbbbb } /* Text.Whitespace */ +.highlight .mb { color: #208050 } /* Literal.Number.Bin */ +.highlight .mf { color: #208050 } /* Literal.Number.Float */ +.highlight .mh { color: #208050 } /* Literal.Number.Hex */ +.highlight .mi { color: #208050 } /* Literal.Number.Integer */ +.highlight .mo { color: #208050 } /* Literal.Number.Oct */ +.highlight .sa { color: #4070a0 } /* Literal.String.Affix */ +.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */ +.highlight .sc { color: #4070a0 } /* Literal.String.Char */ +.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */ +.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */ +.highlight .s2 { color: #4070a0 } /* Literal.String.Double */ +.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */ +.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */ +.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */ +.highlight .sx { color: #c65d09 } /* Literal.String.Other */ +.highlight .sr { color: #235388 } /* Literal.String.Regex */ +.highlight .s1 { color: #4070a0 } /* Literal.String.Single */ +.highlight .ss { color: #517918 } /* Literal.String.Symbol */ +.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */ +.highlight .fm { color: #06287e } /* Name.Function.Magic */ +.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */ +.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */ +.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */ +.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */ +.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */ \ No newline at end of file diff --git a/_static/searchtools.js b/_static/searchtools.js new file mode 100644 index 00000000..92da3f8b --- /dev/null +++ b/_static/searchtools.js @@ -0,0 +1,619 @@ +/* + * searchtools.js + * ~~~~~~~~~~~~~~~~ + * + * Sphinx JavaScript utilities for the full-text search. + * + * :copyright: Copyright 2007-2024 by the Sphinx team, see AUTHORS. + * :license: BSD, see LICENSE for details. + * + */ +"use strict"; + +/** + * Simple result scoring code. + */ +if (typeof Scorer === "undefined") { + var Scorer = { + // Implement the following function to further tweak the score for each result + // The function takes a result array [docname, title, anchor, descr, score, filename] + // and returns the new score. + /* + score: result => { + const [docname, title, anchor, descr, score, filename] = result + return score + }, + */ + + // query matches the full name of an object + objNameMatch: 11, + // or matches in the last dotted part of the object name + objPartialMatch: 6, + // Additive scores depending on the priority of the object + objPrio: { + 0: 15, // used to be importantResults + 1: 5, // used to be objectResults + 2: -5, // used to be unimportantResults + }, + // Used when the priority is not in the mapping. + objPrioDefault: 0, + + // query found in title + title: 15, + partialTitle: 7, + // query found in terms + term: 5, + partialTerm: 2, + }; +} + +const _removeChildren = (element) => { + while (element && element.lastChild) element.removeChild(element.lastChild); +}; + +/** + * See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping + */ +const _escapeRegExp = (string) => + string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string + +const _displayItem = (item, searchTerms, highlightTerms) => { + const docBuilder = DOCUMENTATION_OPTIONS.BUILDER; + const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX; + const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX; + const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY; + const contentRoot = document.documentElement.dataset.content_root; + + const [docName, title, anchor, descr, score, _filename] = item; + + let listItem = document.createElement("li"); + let requestUrl; + let linkUrl; + if (docBuilder === "dirhtml") { + // dirhtml builder + let dirname = docName + "/"; + if (dirname.match(/\/index\/$/)) + dirname = dirname.substring(0, dirname.length - 6); + else if (dirname === "index/") dirname = ""; + requestUrl = contentRoot + dirname; + linkUrl = requestUrl; + } else { + // normal html builders + requestUrl = contentRoot + docName + docFileSuffix; + linkUrl = docName + docLinkSuffix; + } + let linkEl = listItem.appendChild(document.createElement("a")); + linkEl.href = linkUrl + anchor; + linkEl.dataset.score = score; + linkEl.innerHTML = title; + if (descr) { + listItem.appendChild(document.createElement("span")).innerHTML = + " (" + descr + ")"; + // highlight search terms in the description + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + } + else if (showSearchSummary) + fetch(requestUrl) + .then((responseData) => responseData.text()) + .then((data) => { + if (data) + listItem.appendChild( + Search.makeSearchSummary(data, searchTerms, anchor) + ); + // highlight search terms in the summary + if (SPHINX_HIGHLIGHT_ENABLED) // set in sphinx_highlight.js + highlightTerms.forEach((term) => _highlightText(listItem, term, "highlighted")); + }); + Search.output.appendChild(listItem); +}; +const _finishSearch = (resultCount) => { + Search.stopPulse(); + Search.title.innerText = _("Search Results"); + if (!resultCount) + Search.status.innerText = Documentation.gettext( + "Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories." + ); + else + Search.status.innerText = _( + "Search finished, found ${resultCount} page(s) matching the search query." + ).replace('${resultCount}', resultCount); +}; +const _displayNextItem = ( + results, + resultCount, + searchTerms, + highlightTerms, +) => { + // results left, load the summary and display it + // this is intended to be dynamic (don't sub resultsCount) + if (results.length) { + _displayItem(results.pop(), searchTerms, highlightTerms); + setTimeout( + () => _displayNextItem(results, resultCount, searchTerms, highlightTerms), + 5 + ); + } + // search finished, update title and status message + else _finishSearch(resultCount); +}; +// Helper function used by query() to order search results. +// Each input is an array of [docname, title, anchor, descr, score, filename]. +// Order the results by score (in opposite order of appearance, since the +// `_displayNextItem` function uses pop() to retrieve items) and then alphabetically. +const _orderResultsByScoreThenName = (a, b) => { + const leftScore = a[4]; + const rightScore = b[4]; + if (leftScore === rightScore) { + // same score: sort alphabetically + const leftTitle = a[1].toLowerCase(); + const rightTitle = b[1].toLowerCase(); + if (leftTitle === rightTitle) return 0; + return leftTitle > rightTitle ? -1 : 1; // inverted is intentional + } + return leftScore > rightScore ? 1 : -1; +}; + +/** + * Default splitQuery function. Can be overridden in ``sphinx.search`` with a + * custom function per language. + * + * The regular expression works by splitting the string on consecutive characters + * that are not Unicode letters, numbers, underscores, or emoji characters. + * This is the same as ``\W+`` in Python, preserving the surrogate pair area. + */ +if (typeof splitQuery === "undefined") { + var splitQuery = (query) => query + .split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu) + .filter(term => term) // remove remaining empty strings +} + +/** + * Search Module + */ +const Search = { + _index: null, + _queued_query: null, + _pulse_status: -1, + + htmlToText: (htmlString, anchor) => { + const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html'); + for (const removalQuery of [".headerlinks", "script", "style"]) { + htmlElement.querySelectorAll(removalQuery).forEach((el) => { el.remove() }); + } + if (anchor) { + const anchorContent = htmlElement.querySelector(`[role="main"] ${anchor}`); + if (anchorContent) return anchorContent.textContent; + + console.warn( + `Anchored content block not found. Sphinx search tries to obtain it via DOM query '[role=main] ${anchor}'. Check your theme or template.` + ); + } + + // if anchor not specified or not found, fall back to main content + const docContent = htmlElement.querySelector('[role="main"]'); + if (docContent) return docContent.textContent; + + console.warn( + "Content block not found. Sphinx search tries to obtain it via DOM query '[role=main]'. Check your theme or template." + ); + return ""; + }, + + init: () => { + const query = new URLSearchParams(window.location.search).get("q"); + document + .querySelectorAll('input[name="q"]') + .forEach((el) => (el.value = query)); + if (query) Search.performSearch(query); + }, + + loadIndex: (url) => + (document.body.appendChild(document.createElement("script")).src = url), + + setIndex: (index) => { + Search._index = index; + if (Search._queued_query !== null) { + const query = Search._queued_query; + Search._queued_query = null; + Search.query(query); + } + }, + + hasIndex: () => Search._index !== null, + + deferQuery: (query) => (Search._queued_query = query), + + stopPulse: () => (Search._pulse_status = -1), + + startPulse: () => { + if (Search._pulse_status >= 0) return; + + const pulse = () => { + Search._pulse_status = (Search._pulse_status + 1) % 4; + Search.dots.innerText = ".".repeat(Search._pulse_status); + if (Search._pulse_status >= 0) window.setTimeout(pulse, 500); + }; + pulse(); + }, + + /** + * perform a search for something (or wait until index is loaded) + */ + performSearch: (query) => { + // create the required interface elements + const searchText = document.createElement("h2"); + searchText.textContent = _("Searching"); + const searchSummary = document.createElement("p"); + searchSummary.classList.add("search-summary"); + searchSummary.innerText = ""; + const searchList = document.createElement("ul"); + searchList.classList.add("search"); + + const out = document.getElementById("search-results"); + Search.title = out.appendChild(searchText); + Search.dots = Search.title.appendChild(document.createElement("span")); + Search.status = out.appendChild(searchSummary); + Search.output = out.appendChild(searchList); + + const searchProgress = document.getElementById("search-progress"); + // Some themes don't use the search progress node + if (searchProgress) { + searchProgress.innerText = _("Preparing search..."); + } + Search.startPulse(); + + // index already loaded, the browser was quick! + if (Search.hasIndex()) Search.query(query); + else Search.deferQuery(query); + }, + + _parseQuery: (query) => { + // stem the search terms and add them to the correct list + const stemmer = new Stemmer(); + const searchTerms = new Set(); + const excludedTerms = new Set(); + const highlightTerms = new Set(); + const objectTerms = new Set(splitQuery(query.toLowerCase().trim())); + splitQuery(query.trim()).forEach((queryTerm) => { + const queryTermLower = queryTerm.toLowerCase(); + + // maybe skip this "word" + // stopwords array is from language_data.js + if ( + stopwords.indexOf(queryTermLower) !== -1 || + queryTerm.match(/^\d+$/) + ) + return; + + // stem the word + let word = stemmer.stemWord(queryTermLower); + // select the correct list + if (word[0] === "-") excludedTerms.add(word.substr(1)); + else { + searchTerms.add(word); + highlightTerms.add(queryTermLower); + } + }); + + if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js + localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" ")) + } + + // console.debug("SEARCH: searching for:"); + // console.info("required: ", [...searchTerms]); + // console.info("excluded: ", [...excludedTerms]); + + return [query, searchTerms, excludedTerms, highlightTerms, objectTerms]; + }, + + /** + * execute search (requires search index to be loaded) + */ + _performSearch: (query, searchTerms, excludedTerms, highlightTerms, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + const allTitles = Search._index.alltitles; + const indexEntries = Search._index.indexentries; + + // Collect multiple result groups to be sorted separately and then ordered. + // Each is an array of [docname, title, anchor, descr, score, filename]. + const normalResults = []; + const nonMainIndexResults = []; + + _removeChildren(document.getElementById("search-progress")); + + const queryLower = query.toLowerCase().trim(); + for (const [title, foundTitles] of Object.entries(allTitles)) { + if (title.toLowerCase().trim().includes(queryLower) && (queryLower.length >= title.length/2)) { + for (const [file, id] of foundTitles) { + let score = Math.round(100 * queryLower.length / title.length) + normalResults.push([ + docNames[file], + titles[file] !== title ? `${titles[file]} > ${title}` : title, + id !== null ? "#" + id : "", + null, + score, + filenames[file], + ]); + } + } + } + + // search for explicit entries in index directives + for (const [entry, foundEntries] of Object.entries(indexEntries)) { + if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) { + for (const [file, id, isMain] of foundEntries) { + const score = Math.round(100 * queryLower.length / entry.length); + const result = [ + docNames[file], + titles[file], + id ? "#" + id : "", + null, + score, + filenames[file], + ]; + if (isMain) { + normalResults.push(result); + } else { + nonMainIndexResults.push(result); + } + } + } + } + + // lookup as object + objectTerms.forEach((term) => + normalResults.push(...Search.performObjectSearch(term, objectTerms)) + ); + + // lookup as search terms in fulltext + normalResults.push(...Search.performTermsSearch(searchTerms, excludedTerms)); + + // let the scorer override scores with a custom scoring function + if (Scorer.score) { + normalResults.forEach((item) => (item[4] = Scorer.score(item))); + nonMainIndexResults.forEach((item) => (item[4] = Scorer.score(item))); + } + + // Sort each group of results by score and then alphabetically by name. + normalResults.sort(_orderResultsByScoreThenName); + nonMainIndexResults.sort(_orderResultsByScoreThenName); + + // Combine the result groups in (reverse) order. + // Non-main index entries are typically arbitrary cross-references, + // so display them after other results. + let results = [...nonMainIndexResults, ...normalResults]; + + // remove duplicate search results + // note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept + let seen = new Set(); + results = results.reverse().reduce((acc, result) => { + let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(','); + if (!seen.has(resultStr)) { + acc.push(result); + seen.add(resultStr); + } + return acc; + }, []); + + return results.reverse(); + }, + + query: (query) => { + const [searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms] = Search._parseQuery(query); + const results = Search._performSearch(searchQuery, searchTerms, excludedTerms, highlightTerms, objectTerms); + + // for debugging + //Search.lastresults = results.slice(); // a copy + // console.info("search results:", Search.lastresults); + + // print the results + _displayNextItem(results, results.length, searchTerms, highlightTerms); + }, + + /** + * search for object names + */ + performObjectSearch: (object, objectTerms) => { + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const objects = Search._index.objects; + const objNames = Search._index.objnames; + const titles = Search._index.titles; + + const results = []; + + const objectSearchCallback = (prefix, match) => { + const name = match[4] + const fullname = (prefix ? prefix + "." : "") + name; + const fullnameLower = fullname.toLowerCase(); + if (fullnameLower.indexOf(object) < 0) return; + + let score = 0; + const parts = fullnameLower.split("."); + + // check for different match types: exact matches of full name or + // "last name" (i.e. last dotted part) + if (fullnameLower === object || parts.slice(-1)[0] === object) + score += Scorer.objNameMatch; + else if (parts.slice(-1)[0].indexOf(object) > -1) + score += Scorer.objPartialMatch; // matches in last name + + const objName = objNames[match[1]][2]; + const title = titles[match[0]]; + + // If more than one term searched for, we require other words to be + // found in the name/title/description + const otherTerms = new Set(objectTerms); + otherTerms.delete(object); + if (otherTerms.size > 0) { + const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase(); + if ( + [...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0) + ) + return; + } + + let anchor = match[3]; + if (anchor === "") anchor = fullname; + else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname; + + const descr = objName + _(", in ") + title; + + // add custom score for some objects according to scorer + if (Scorer.objPrio.hasOwnProperty(match[2])) + score += Scorer.objPrio[match[2]]; + else score += Scorer.objPrioDefault; + + results.push([ + docNames[match[0]], + fullname, + "#" + anchor, + descr, + score, + filenames[match[0]], + ]); + }; + Object.keys(objects).forEach((prefix) => + objects[prefix].forEach((array) => + objectSearchCallback(prefix, array) + ) + ); + return results; + }, + + /** + * search for full-text terms in the index + */ + performTermsSearch: (searchTerms, excludedTerms) => { + // prepare search + const terms = Search._index.terms; + const titleTerms = Search._index.titleterms; + const filenames = Search._index.filenames; + const docNames = Search._index.docnames; + const titles = Search._index.titles; + + const scoreMap = new Map(); + const fileMap = new Map(); + + // perform the search on the required terms + searchTerms.forEach((word) => { + const files = []; + const arr = [ + { files: terms[word], score: Scorer.term }, + { files: titleTerms[word], score: Scorer.title }, + ]; + // add support for partial matches + if (word.length > 2) { + const escapedWord = _escapeRegExp(word); + if (!terms.hasOwnProperty(word)) { + Object.keys(terms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: terms[term], score: Scorer.partialTerm }); + }); + } + if (!titleTerms.hasOwnProperty(word)) { + Object.keys(titleTerms).forEach((term) => { + if (term.match(escapedWord)) + arr.push({ files: titleTerms[term], score: Scorer.partialTitle }); + }); + } + } + + // no match but word was a required one + if (arr.every((record) => record.files === undefined)) return; + + // found search word in contents + arr.forEach((record) => { + if (record.files === undefined) return; + + let recordFiles = record.files; + if (recordFiles.length === undefined) recordFiles = [recordFiles]; + files.push(...recordFiles); + + // set score for the word in each file + recordFiles.forEach((file) => { + if (!scoreMap.has(file)) scoreMap.set(file, {}); + scoreMap.get(file)[word] = record.score; + }); + }); + + // create the mapping + files.forEach((file) => { + if (!fileMap.has(file)) fileMap.set(file, [word]); + else if (fileMap.get(file).indexOf(word) === -1) fileMap.get(file).push(word); + }); + }); + + // now check if the files don't contain excluded terms + const results = []; + for (const [file, wordList] of fileMap) { + // check if all requirements are matched + + // as search terms with length < 3 are discarded + const filteredTermCount = [...searchTerms].filter( + (term) => term.length > 2 + ).length; + if ( + wordList.length !== searchTerms.size && + wordList.length !== filteredTermCount + ) + continue; + + // ensure that none of the excluded terms is in the search result + if ( + [...excludedTerms].some( + (term) => + terms[term] === file || + titleTerms[term] === file || + (terms[term] || []).includes(file) || + (titleTerms[term] || []).includes(file) + ) + ) + break; + + // select one (max) score for the file. + const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w])); + // add result to the result list + results.push([ + docNames[file], + titles[file], + "", + null, + score, + filenames[file], + ]); + } + return results; + }, + + /** + * helper function to return a node containing the + * search summary for a given text. keywords is a list + * of stemmed words. + */ + makeSearchSummary: (htmlText, keywords, anchor) => { + const text = Search.htmlToText(htmlText, anchor); + if (text === "") return null; + + const textLower = text.toLowerCase(); + const actualStartPosition = [...keywords] + .map((k) => textLower.indexOf(k.toLowerCase())) + .filter((i) => i > -1) + .slice(-1)[0]; + const startWithContext = Math.max(actualStartPosition - 120, 0); + + const top = startWithContext === 0 ? "" : "..."; + const tail = startWithContext + 240 < text.length ? "..." : ""; + + let summary = document.createElement("p"); + summary.classList.add("context"); + summary.textContent = top + text.substr(startWithContext, 240).trim() + tail; + + return summary; + }, +}; + +_ready(Search.init); diff --git a/_static/sphinx_highlight.js b/_static/sphinx_highlight.js new file mode 100644 index 00000000..8a96c69a --- /dev/null +++ b/_static/sphinx_highlight.js @@ -0,0 +1,154 @@ +/* Highlighting utilities for Sphinx HTML documentation. */ +"use strict"; + +const SPHINX_HIGHLIGHT_ENABLED = true + +/** + * highlight a given string on a node by wrapping it in + * span elements with the given class name. + */ +const _highlight = (node, addItems, text, className) => { + if (node.nodeType === Node.TEXT_NODE) { + const val = node.nodeValue; + const parent = node.parentNode; + const pos = val.toLowerCase().indexOf(text); + if ( + pos >= 0 && + !parent.classList.contains(className) && + !parent.classList.contains("nohighlight") + ) { + let span; + + const closestNode = parent.closest("body, svg, foreignObject"); + const isInSVG = closestNode && closestNode.matches("svg"); + if (isInSVG) { + span = document.createElementNS("http://www.w3.org/2000/svg", "tspan"); + } else { + span = document.createElement("span"); + span.classList.add(className); + } + + span.appendChild(document.createTextNode(val.substr(pos, text.length))); + const rest = document.createTextNode(val.substr(pos + text.length)); + parent.insertBefore( + span, + parent.insertBefore( + rest, + node.nextSibling + ) + ); + node.nodeValue = val.substr(0, pos); + /* There may be more occurrences of search term in this node. So call this + * function recursively on the remaining fragment. + */ + _highlight(rest, addItems, text, className); + + if (isInSVG) { + const rect = document.createElementNS( + "http://www.w3.org/2000/svg", + "rect" + ); + const bbox = parent.getBBox(); + rect.x.baseVal.value = bbox.x; + rect.y.baseVal.value = bbox.y; + rect.width.baseVal.value = bbox.width; + rect.height.baseVal.value = bbox.height; + rect.setAttribute("class", className); + addItems.push({ parent: parent, target: rect }); + } + } + } else if (node.matches && !node.matches("button, select, textarea")) { + node.childNodes.forEach((el) => _highlight(el, addItems, text, className)); + } +}; +const _highlightText = (thisNode, text, className) => { + let addItems = []; + _highlight(thisNode, addItems, text, className); + addItems.forEach((obj) => + obj.parent.insertAdjacentElement("beforebegin", obj.target) + ); +}; + +/** + * Small JavaScript module for the documentation. + */ +const SphinxHighlight = { + + /** + * highlight the search words provided in localstorage in the text + */ + highlightSearchWords: () => { + if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight + + // get and clear terms from localstorage + const url = new URL(window.location); + const highlight = + localStorage.getItem("sphinx_highlight_terms") + || url.searchParams.get("highlight") + || ""; + localStorage.removeItem("sphinx_highlight_terms") + url.searchParams.delete("highlight"); + window.history.replaceState({}, "", url); + + // get individual terms from highlight string + const terms = highlight.toLowerCase().split(/\s+/).filter(x => x); + if (terms.length === 0) return; // nothing to do + + // There should never be more than one element matching "div.body" + const divBody = document.querySelectorAll("div.body"); + const body = divBody.length ? divBody[0] : document.querySelector("body"); + window.setTimeout(() => { + terms.forEach((term) => _highlightText(body, term, "highlighted")); + }, 10); + + const searchBox = document.getElementById("searchbox"); + if (searchBox === null) return; + searchBox.appendChild( + document + .createRange() + .createContextualFragment( + '" + ) + ); + }, + + /** + * helper function to hide the search marks again + */ + hideSearchWords: () => { + document + .querySelectorAll("#searchbox .highlight-link") + .forEach((el) => el.remove()); + document + .querySelectorAll("span.highlighted") + .forEach((el) => el.classList.remove("highlighted")); + localStorage.removeItem("sphinx_highlight_terms") + }, + + initEscapeListener: () => { + // only install a listener if it is really needed + if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return; + + document.addEventListener("keydown", (event) => { + // bail for input elements + if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return; + // bail with special keys + if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return; + if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) { + SphinxHighlight.hideSearchWords(); + event.preventDefault(); + } + }); + }, +}; + +_ready(() => { + /* Do not call highlightSearchWords() when we are on the search page. + * It will highlight words from the *previous* search query. + */ + if (typeof Search === "undefined") SphinxHighlight.highlightSearchWords(); + SphinxHighlight.initEscapeListener(); +}); diff --git a/api.html b/api.html new file mode 100644 index 00000000..c133c1ac --- /dev/null +++ b/api.html @@ -0,0 +1,318 @@ + + + + + + + <no title> — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ + + + + + + +

elm

Energy Language Model

+ + +
+
+
+ +
+ +
+

© Copyright 2023, Alliance for Sustainable Energy, LLC.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/examples.energy_wizard.html b/examples.energy_wizard.html new file mode 100644 index 00000000..24907998 --- /dev/null +++ b/examples.energy_wizard.html @@ -0,0 +1,341 @@ + + + + + + + The Energy Wizard — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

The Energy Wizard

+

This example demonstrates how to download a set of PDFs, convert to text, +chunk, embed, and then run a streamlit app that interfaces an LLM with the text +corpus.

+

Notes:

+
    +
  • In this example, we use the optional popper PDF utility which you will have to install separately. You can also use the python-native PyPDF2 package when calling using elm.pdf.PDFtoTXT but we have found that poppler works better.

  • +
  • Streamlit is required to run this app, which is not an explicit requirement of this repo (pip install streamlit)

  • +
  • You need to set up your own OpenAI or Azure-OpenAI API keys to run the scripts.

  • +
+
+

Downloading and Embedding PDFs

+

Run python ./retrieve_docs.py to retrieve 20 of the latest NREL technical +reports from OSTI. The script then converts the PDFs to text and then runs the +text through the OpenAI embedding model.

+
+
+

Running the Streamlit App

+

Run streamlit run ./run_app.py to start the streamlit app. You can now chat +with the Energy Wizard, which will interface with the downloaded text corpus to +answer your questions.

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/examples.html b/examples.html new file mode 100644 index 00000000..f132c106 --- /dev/null +++ b/examples.html @@ -0,0 +1,337 @@ + + + + + + + Examples — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+ + +
+
+
+
+ + + + \ No newline at end of file diff --git a/examples.ordinance_gpt.html b/examples.ordinance_gpt.html new file mode 100644 index 00000000..f0fb8ea9 --- /dev/null +++ b/examples.ordinance_gpt.html @@ -0,0 +1,380 @@ + + + + + + + Ordinance GPT — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Ordinance GPT

+

This example folder contains supporting documents, results, and code for the +Ordinance GPT experiment.

+
+

Prerequisites

+

We recommend installing the pytesseract module to allow PDF retrieval for scanned documents. +See the ordinance-specific installation instructions +for more details.

+
+
+

Setup

+

There are a few key things you need to set up in order to run ordinance retrieval and extraction. +First, you must specify which counties you want to process. You can do this by setting up a CSV file +with a County and a State column. Each row in the CSV file then represents a single county to process. +See the example CSV +file for reference.

+

Once you have set up the county CSV, you can fill out the +template JSON config. +See the documentation for the “process_counties_with_openai” function +for an explanation of all the allowed inputs to the configuration file. +Some notable inputs here are the azure* keys, which should be configured to match your Azure OpenAI API +deployment (unless it’s defined in your environment with the AZURE_OPENAI_API_KEY, AZURE_OPENAI_VERSION, +and AZURE_OPENAI_ENDPOINT keys, in which case you can remove these keys completely), +and the pytesseract_exe_fp key, which should point to the pytesseract executable path on your +local machine (or removed from the config file if you are opting out of OCR). You may also have to adjust +the llm_service_rate_limit to match your deployment’s API tokens-per-minute limit. Be sure to provide full +paths to all files/directories unless you are executing the program from your working folder.

+
+
+

Execution

+

Once you are happy with the configuration parameters, you can kick off the processing using

+
$ elm ords -c config.json
+
+
+

You may also wish to add a -v option to print logs to the terminal (however, keep in mind that the code runs +asynchronously, so the the logs will not print in order).

+
+

Warning

+

Running all of the 85 counties given in the sample county CSV file can cost $700-$1000 in API calls. We recommend running a smaller subset for example purposes.

+
+
+
+

Source Ordinance Documents

+

The ordinance documents downloaded using (an older version of) this example code can be downloaded here.

+
+
+

Debugging

+

Not sure why things aren’t working? No error messages? Make sure you run the CLI call with a -v flag for “verbose” logging (e.g., $ elm ords -c config.json -v)

+

Errors on import statements? Trouble importing pdftotext with cryptic error messages like symbol not found in flat namespace? Follow the ordinance-specific install instructions exactly.

+
+
+

Extension to Other Technologies

+

Extending this functionality to other technologies is possible but requires deeper understanding of the underlying processes. +We recommend you start out by examining the decision tree queries in graphs.py +as well as how they are applied in parse.py. Once you +have a firm understanding of these two modules, look through the +document validation routines <https://github.com/NREL/elm/blob/main/elm/ords/validation> to get a better sense of how to +adjust the web-scraping portion of the code to your technology. When you have set up the validation and parsing for your +technology, put it all together by adjusting the “process_counties_with_openai” function +to call your new routines.

+
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/genindex.html b/genindex.html new file mode 100644 index 00000000..3d6cadfc --- /dev/null +++ b/genindex.html @@ -0,0 +1,2080 @@ + + + + + + Index — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ + +

Index

+ +
+ Symbols + | _ + | A + | B + | C + | D + | E + | F + | G + | H + | I + | K + | L + | M + | N + | O + | P + | R + | S + | T + | U + | V + | W + | Y + +
+

Symbols

+ + + +
+ +

_

+ + + +
+ +

A

+ + + +
+ +

B

+ + + +
+ +

C

+ + + +
+ +

D

+ + + +
+ +

E

+ + + +
    +
  • + elm + +
  • +
  • + elm command line option + +
  • +
  • + elm-ords command line option + +
  • +
  • + elm.base + +
  • +
  • + elm.chunk + +
  • +
  • + elm.cli + +
  • +
  • + elm.embed + +
  • +
  • + elm.exceptions + +
  • +
  • + elm.ords + +
  • +
  • + elm.ords.download + +
  • +
  • + elm.ords.extraction + +
  • +
  • + elm.ords.extraction.apply + +
  • +
  • + elm.ords.extraction.date + +
  • +
  • + elm.ords.extraction.features + +
  • +
  • + elm.ords.extraction.graphs + +
  • +
  • + elm.ords.extraction.ngrams + +
  • +
  • + elm.ords.extraction.ordinance + +
  • +
  • + elm.ords.extraction.parse + +
  • +
  • + elm.ords.extraction.tree + +
  • +
  • + elm.ords.llm + +
  • +
  • + elm.ords.llm.calling + +
  • +
  • + elm.ords.process + +
  • +
  • + elm.ords.services + +
  • +
  • + elm.ords.services.base + +
  • +
  • + elm.ords.services.cpu + +
  • +
  • + elm.ords.services.openai + +
  • +
  • + elm.ords.services.provider + +
  • +
  • + elm.ords.services.queues + +
  • +
  • + elm.ords.services.threaded + +
  • +
  • + elm.ords.services.usage + +
  • +
  • + elm.ords.utilities + +
  • +
  • + elm.ords.utilities.counties + +
  • +
  • + elm.ords.utilities.exceptions + +
  • +
  • + elm.ords.utilities.location + +
  • +
+ +

F

+ + + +
+ +

G

+ + + +
+ +

H

+ + + +
+ +

I

+ + + +
+ +

K

+ + +
+ +

L

+ + + +
+ +

M

+ + +
+ +

N

+ + + +
+ +

O

+ + + +
+ +

P

+ + + +
+ +

R

+ + + +
+ +

S

+ + + +
+ +

T

+ + + +
+ +

U

+ + + +
+ +

V

+ + + +
+ +

W

+ + + +
+ +

Y

+ + +
+ + + +
+
+
+ +
+ +
+

© Copyright 2023, Alliance for Sustainable Energy, LLC.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/index.html b/index.html new file mode 100644 index 00000000..4f916cfd --- /dev/null +++ b/index.html @@ -0,0 +1,364 @@ + + + + + + + Energy Language Model (ELM) — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+
+
+

Energy Language Model (ELM)

+https://github.com/NREL/elm/workflows/Documentation/badge.svg +https://github.com/NREL/elm/workflows/pytests/badge.svg +https://github.com/NREL/elm/workflows/Lint%20Code%20Base/badge.svg +https://img.shields.io/pypi/pyversions/NREL-elm.svg +https://badge.fury.io/py/NREL-elm.svg +https://zenodo.org/badge/690793778.svg +

The Energy Language Model (ELM) software provides interfaces to apply Large Language Models (LLMs) like ChatGPT and GPT-4 to energy research. For example, you might be interested in:

+ +
+

Installing ELM

+

NOTE: If you are installing ELM to run ordinance scraping and extraction, +see the ordinance-specific installation instructions.

+

Option #1 (basic usage):

+
    +
  1. pip install NREL-elm

  2. +
+

Option #2 (developer install):

+
    +
  1. from home dir, git clone git@github.com:NREL/elm.git

  2. +
  3. +
    Create elm environment and install package
      +
    1. Create a conda env: conda create -n elm

    2. +
    3. Run the command: conda activate elm

    4. +
    5. cd into the repo cloned in 1.

    6. +
    7. Prior to running pip below, make sure the branch is correct (install +from main!)

    8. +
    9. Install elm and its dependencies by running: +pip install . (or pip install -e . if running a dev branch +or working on the source code)

    10. +
    +
    +
    +
  4. +
+
+
+

Acknowledgments

+

This work was authored by the National Renewable Energy Laboratory, operated by Alliance for Sustainable Energy, LLC, for the U.S. Department of Energy (DOE) under Contract No. DE-AC36-08GO28308. Funding provided by the DOE Wind Energy Technologies Office (WETO), the DOE Solar Energy Technologies Office (SETO), and internal research funds at the National Renewable Energy Laboratory. The views expressed in the article do not necessarily represent the views of the DOE or the U.S. Government. The U.S. Government retains and the publisher, by accepting the article for publication, acknowledges that the U.S. Government retains a nonexclusive, paid-up, irrevocable, worldwide license to publish or reproduce the published form of this work, or allow others to do so, for U.S. Government purposes.

+
+
+ + +
+
+
+ +
+ +
+

© Copyright 2023, Alliance for Sustainable Energy, LLC.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/installation.html b/installation.html new file mode 100644 index 00000000..fca0f9aa --- /dev/null +++ b/installation.html @@ -0,0 +1,343 @@ + + + + + + + Installation — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+ +
+
+
+
+ +
+

Installation

+

NOTE: If you are installing ELM to run ordinance scraping and extraction, +see the ordinance-specific installation instructions.

+

Option #1 (basic usage):

+
    +
  1. pip install NREL-elm

  2. +
+

Option #2 (developer install):

+
    +
  1. from home dir, git clone git@github.com:NREL/elm.git

  2. +
  3. +
    Create elm environment and install package
      +
    1. Create a conda env: conda create -n elm

    2. +
    3. Run the command: conda activate elm

    4. +
    5. cd into the repo cloned in 1.

    6. +
    7. Prior to running pip below, make sure the branch is correct (install +from main!)

    8. +
    9. Install elm and its dependencies by running: +pip install . (or pip install -e . if running a dev branch +or working on the source code)

    10. +
    +
    +
    +
  4. +
+
+ + +
+
+ +
+
+
+
+ + + + \ No newline at end of file diff --git a/objects.inv b/objects.inv new file mode 100644 index 00000000..99a2f956 Binary files /dev/null and b/objects.inv differ diff --git a/py-modindex.html b/py-modindex.html new file mode 100644 index 00000000..b3e92be2 --- /dev/null +++ b/py-modindex.html @@ -0,0 +1,585 @@ + + + + + + Python Module Index — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + +

Python Module Index

+ +
+ e +
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
 
+ e
+ elm +
    + elm.base +
    + elm.chunk +
    + elm.cli +
    + elm.embed +
    + elm.exceptions +
    + elm.ords +
    + elm.ords.download +
    + elm.ords.extraction +
    + elm.ords.extraction.apply +
    + elm.ords.extraction.date +
    + elm.ords.extraction.features +
    + elm.ords.extraction.graphs +
    + elm.ords.extraction.ngrams +
    + elm.ords.extraction.ordinance +
    + elm.ords.extraction.parse +
    + elm.ords.extraction.tree +
    + elm.ords.llm +
    + elm.ords.llm.calling +
    + elm.ords.process +
    + elm.ords.services +
    + elm.ords.services.base +
    + elm.ords.services.cpu +
    + elm.ords.services.openai +
    + elm.ords.services.provider +
    + elm.ords.services.queues +
    + elm.ords.services.threaded +
    + elm.ords.services.usage +
    + elm.ords.utilities +
    + elm.ords.utilities.counties +
    + elm.ords.utilities.exceptions +
    + elm.ords.utilities.location +
    + elm.ords.utilities.parsing +
    + elm.ords.utilities.queued_logging +
    + elm.ords.validation +
    + elm.ords.validation.content +
    + elm.ords.validation.location +
    + elm.osti +
    + elm.pdf +
    + elm.summary +
    + elm.tree +
    + elm.utilities +
    + elm.utilities.parse +
    + elm.utilities.retry +
    + elm.version +
    + elm.web +
    + elm.web.document +
    + elm.web.file_loader +
    + elm.web.google_search +
    + elm.web.html_pw +
    + elm.web.utilities +
    + elm.wizard +
+ + +
+
+
+ +
+ +
+

© Copyright 2023, Alliance for Sustainable Energy, LLC.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + \ No newline at end of file diff --git a/search.html b/search.html new file mode 100644 index 00000000..0340088e --- /dev/null +++ b/search.html @@ -0,0 +1,330 @@ + + + + + + Search — elm 0.0.4 documentation + + + + + + + + + + + + + + + + + + + + +
+ + +
+ +
+
+
+
    +
  • + +
  • +
  • +
+
+
+
+
+ + + + +
+ +
+ +
+
+
+ +
+ +
+

© Copyright 2023, Alliance for Sustainable Energy, LLC.

+
+ + Built with Sphinx using a + theme + provided by Read the Docs. + + +
+
+
+
+
+ + + + + + + + + \ No newline at end of file diff --git a/searchindex.js b/searchindex.js new file mode 100644 index 00000000..bb3d0e21 --- /dev/null +++ b/searchindex.js @@ -0,0 +1 @@ +Search.setIndex({"alltitles": {"Acknowledgments": [[168, "acknowledgments"]], "Command Line Interfaces (CLIs)": [[162, "command-line-interfaces-clis"]], "Debugging": [[167, "debugging"]], "Downloading and Embedding PDFs": [[166, "downloading-and-embedding-pdfs"]], "Energy Language Model (ELM)": [[168, "energy-language-model-elm"]], "Examples": [[165, "examples"]], "Execution": [[167, "execution"]], "Extension to Other Technologies": [[167, "extension-to-other-technologies"]], "Installation": [[169, "installation"]], "Installing ELM": [[168, "installing-elm"]], "Ordinance GPT": [[167, "ordinance-gpt"]], "Prerequisites": [[167, "prerequisites"]], "Running the Streamlit App": [[166, "running-the-streamlit-app"]], "Setup": [[167, "setup"]], "Source Ordinance Documents": [[167, "source-ordinance-documents"]], "The Energy Wizard": [[166, "the-energy-wizard"]], "elm": [[0, "module-elm"], [163, "elm"]], "elm.base": [[1, "module-elm.base"]], "elm.base.ApiBase": [[2, "elm-base-apibase"]], "elm.base.ApiQueue": [[3, "elm-base-apiqueue"]], "elm.chunk": [[4, "module-elm.chunk"]], "elm.chunk.Chunker": [[5, "elm-chunk-chunker"]], "elm.cli": [[6, "module-elm.cli"]], "elm.embed": [[7, "module-elm.embed"]], "elm.embed.ChunkAndEmbed": [[8, "elm-embed-chunkandembed"]], "elm.exceptions": [[9, "module-elm.exceptions"]], "elm.exceptions.ELMError": [[10, "elm-exceptions-elmerror"]], "elm.exceptions.ELMRuntimeError": [[11, "elm-exceptions-elmruntimeerror"]], "elm.ords": [[12, "module-elm.ords"]], "elm.ords.download": [[13, "module-elm.ords.download"]], "elm.ords.download.download_county_ordinance": [[14, "elm-ords-download-download-county-ordinance"]], "elm.ords.extraction": [[15, "module-elm.ords.extraction"]], "elm.ords.extraction.apply": [[16, "module-elm.ords.extraction.apply"]], "elm.ords.extraction.apply.check_for_ordinance_info": [[17, "elm-ords-extraction-apply-check-for-ordinance-info"]], "elm.ords.extraction.apply.extract_ordinance_text_with_llm": [[18, "elm-ords-extraction-apply-extract-ordinance-text-with-llm"]], "elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation": [[19, "elm-ords-extraction-apply-extract-ordinance-text-with-ngram-validation"]], "elm.ords.extraction.apply.extract_ordinance_values": [[20, "elm-ords-extraction-apply-extract-ordinance-values"]], "elm.ords.extraction.date": [[21, "module-elm.ords.extraction.date"]], "elm.ords.extraction.date.DateExtractor": [[22, "elm-ords-extraction-date-dateextractor"]], "elm.ords.extraction.features": [[23, "module-elm.ords.extraction.features"]], "elm.ords.extraction.features.SetbackFeatures": [[24, "elm-ords-extraction-features-setbackfeatures"]], "elm.ords.extraction.graphs": [[25, "module-elm.ords.extraction.graphs"]], "elm.ords.extraction.graphs.llm_response_does_not_start_with_no": [[26, "elm-ords-extraction-graphs-llm-response-does-not-start-with-no"]], "elm.ords.extraction.graphs.llm_response_starts_with_no": [[27, "elm-ords-extraction-graphs-llm-response-starts-with-no"]], "elm.ords.extraction.graphs.llm_response_starts_with_yes": [[28, "elm-ords-extraction-graphs-llm-response-starts-with-yes"]], "elm.ords.extraction.graphs.setup_base_graph": [[29, "elm-ords-extraction-graphs-setup-base-graph"]], "elm.ords.extraction.graphs.setup_conditional": [[30, "elm-ords-extraction-graphs-setup-conditional"]], "elm.ords.extraction.graphs.setup_graph_extra_restriction": [[31, "elm-ords-extraction-graphs-setup-graph-extra-restriction"]], "elm.ords.extraction.graphs.setup_graph_wes_types": [[32, "elm-ords-extraction-graphs-setup-graph-wes-types"]], "elm.ords.extraction.graphs.setup_multiplier": [[33, "elm-ords-extraction-graphs-setup-multiplier"]], "elm.ords.extraction.graphs.setup_participating_owner": [[34, "elm-ords-extraction-graphs-setup-participating-owner"]], "elm.ords.extraction.ngrams": [[35, "module-elm.ords.extraction.ngrams"]], "elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams": [[36, "elm-ords-extraction-ngrams-convert-text-to-sentence-ngrams"]], "elm.ords.extraction.ngrams.sentence_ngram_containment": [[37, "elm-ords-extraction-ngrams-sentence-ngram-containment"]], "elm.ords.extraction.ordinance": [[38, "module-elm.ords.extraction.ordinance"]], "elm.ords.extraction.ordinance.OrdinanceExtractor": [[39, "elm-ords-extraction-ordinance-ordinanceextractor"]], "elm.ords.extraction.ordinance.OrdinanceValidator": [[40, "elm-ords-extraction-ordinance-ordinancevalidator"]], "elm.ords.extraction.parse": [[41, "module-elm.ords.extraction.parse"]], "elm.ords.extraction.parse.StructuredOrdinanceParser": [[42, "elm-ords-extraction-parse-structuredordinanceparser"]], "elm.ords.extraction.tree": [[43, "module-elm.ords.extraction.tree"]], "elm.ords.extraction.tree.AsyncDecisionTree": [[44, "elm-ords-extraction-tree-asyncdecisiontree"]], "elm.ords.llm": [[45, "module-elm.ords.llm"]], "elm.ords.llm.calling": [[46, "module-elm.ords.llm.calling"]], "elm.ords.llm.calling.BaseLLMCaller": [[47, "elm-ords-llm-calling-basellmcaller"]], "elm.ords.llm.calling.ChatLLMCaller": [[48, "elm-ords-llm-calling-chatllmcaller"]], "elm.ords.llm.calling.LLMCaller": [[49, "elm-ords-llm-calling-llmcaller"]], "elm.ords.llm.calling.StructuredLLMCaller": [[50, "elm-ords-llm-calling-structuredllmcaller"]], "elm.ords.process": [[51, "module-elm.ords.process"]], "elm.ords.process.download_doc_for_county": [[52, "elm-ords-process-download-doc-for-county"]], "elm.ords.process.download_docs_for_county_with_logging": [[53, "elm-ords-process-download-docs-for-county-with-logging"]], "elm.ords.process.process_counties_with_openai": [[54, "elm-ords-process-process-counties-with-openai"]], "elm.ords.services": [[55, "module-elm.ords.services"]], "elm.ords.services.base": [[56, "module-elm.ords.services.base"]], "elm.ords.services.base.RateLimitedService": [[57, "elm-ords-services-base-ratelimitedservice"]], "elm.ords.services.base.Service": [[58, "elm-ords-services-base-service"]], "elm.ords.services.cpu": [[59, "module-elm.ords.services.cpu"]], "elm.ords.services.cpu.PDFLoader": [[60, "elm-ords-services-cpu-pdfloader"]], "elm.ords.services.cpu.ProcessPoolService": [[61, "elm-ords-services-cpu-processpoolservice"]], "elm.ords.services.cpu.read_pdf_doc": [[62, "elm-ords-services-cpu-read-pdf-doc"]], "elm.ords.services.cpu.read_pdf_doc_ocr": [[63, "elm-ords-services-cpu-read-pdf-doc-ocr"]], "elm.ords.services.openai": [[64, "module-elm.ords.services.openai"]], "elm.ords.services.openai.OpenAIService": [[65, "elm-ords-services-openai-openaiservice"]], "elm.ords.services.openai.count_tokens": [[66, "elm-ords-services-openai-count-tokens"]], "elm.ords.services.openai.usage_from_response": [[67, "elm-ords-services-openai-usage-from-response"]], "elm.ords.services.provider": [[68, "module-elm.ords.services.provider"]], "elm.ords.services.provider.RunningAsyncServices": [[69, "elm-ords-services-provider-runningasyncservices"]], "elm.ords.services.queues": [[70, "module-elm.ords.services.queues"]], "elm.ords.services.queues.get_service_queue": [[71, "elm-ords-services-queues-get-service-queue"]], "elm.ords.services.queues.initialize_service_queue": [[72, "elm-ords-services-queues-initialize-service-queue"]], "elm.ords.services.queues.tear_down_service_queue": [[73, "elm-ords-services-queues-tear-down-service-queue"]], "elm.ords.services.threaded": [[74, "module-elm.ords.services.threaded"]], "elm.ords.services.threaded.CleanedFileWriter": [[75, "elm-ords-services-threaded-cleanedfilewriter"]], "elm.ords.services.threaded.FileMover": [[76, "elm-ords-services-threaded-filemover"]], "elm.ords.services.threaded.OrdDBFileWriter": [[77, "elm-ords-services-threaded-orddbfilewriter"]], "elm.ords.services.threaded.StoreFileOnDisk": [[78, "elm-ords-services-threaded-storefileondisk"]], "elm.ords.services.threaded.TempFileCache": [[79, "elm-ords-services-threaded-tempfilecache"]], "elm.ords.services.threaded.ThreadedService": [[80, "elm-ords-services-threaded-threadedservice"]], "elm.ords.services.threaded.UsageUpdater": [[81, "elm-ords-services-threaded-usageupdater"]], "elm.ords.services.usage": [[82, "module-elm.ords.services.usage"]], "elm.ords.services.usage.TimeBoundedUsageTracker": [[83, "elm-ords-services-usage-timeboundedusagetracker"]], "elm.ords.services.usage.TimedEntry": [[84, "elm-ords-services-usage-timedentry"]], "elm.ords.services.usage.UsageTracker": [[85, "elm-ords-services-usage-usagetracker"]], "elm.ords.utilities": [[86, "module-elm.ords.utilities"]], "elm.ords.utilities.counties": [[87, "module-elm.ords.utilities.counties"]], "elm.ords.utilities.counties.county_websites": [[88, "elm-ords-utilities-counties-county-websites"]], "elm.ords.utilities.counties.load_all_county_info": [[89, "elm-ords-utilities-counties-load-all-county-info"]], "elm.ords.utilities.counties.load_counties_from_fp": [[90, "elm-ords-utilities-counties-load-counties-from-fp"]], "elm.ords.utilities.exceptions": [[91, "module-elm.ords.utilities.exceptions"]], "elm.ords.utilities.exceptions.ELMOrdsError": [[92, "elm-ords-utilities-exceptions-elmordserror"]], "elm.ords.utilities.exceptions.ELMOrdsNotInitializedError": [[93, "elm-ords-utilities-exceptions-elmordsnotinitializederror"]], "elm.ords.utilities.exceptions.ELMOrdsRuntimeError": [[94, "elm-ords-utilities-exceptions-elmordsruntimeerror"]], "elm.ords.utilities.exceptions.ELMOrdsValueError": [[95, "elm-ords-utilities-exceptions-elmordsvalueerror"]], "elm.ords.utilities.location": [[96, "module-elm.ords.utilities.location"]], "elm.ords.utilities.location.County": [[97, "elm-ords-utilities-location-county"]], "elm.ords.utilities.location.Location": [[98, "elm-ords-utilities-location-location"]], "elm.ords.utilities.parsing": [[99, "module-elm.ords.utilities.parsing"]], "elm.ords.utilities.parsing.llm_response_as_json": [[100, "elm-ords-utilities-parsing-llm-response-as-json"]], "elm.ords.utilities.parsing.merge_overlapping_texts": [[101, "elm-ords-utilities-parsing-merge-overlapping-texts"]], "elm.ords.utilities.queued_logging": [[102, "module-elm.ords.utilities.queued_logging"]], "elm.ords.utilities.queued_logging.LocalProcessQueueHandler": [[103, "elm-ords-utilities-queued-logging-localprocessqueuehandler"]], "elm.ords.utilities.queued_logging.LocationFileLog": [[104, "elm-ords-utilities-queued-logging-locationfilelog"]], "elm.ords.utilities.queued_logging.LocationFilter": [[105, "elm-ords-utilities-queued-logging-locationfilter"]], "elm.ords.utilities.queued_logging.LogListener": [[106, "elm-ords-utilities-queued-logging-loglistener"]], "elm.ords.utilities.queued_logging.NoLocationFilter": [[107, "elm-ords-utilities-queued-logging-nolocationfilter"]], "elm.ords.validation": [[108, "module-elm.ords.validation"]], "elm.ords.validation.content": [[109, "module-elm.ords.validation.content"]], "elm.ords.validation.content.ValidationWithMemory": [[110, "elm-ords-validation-content-validationwithmemory"]], "elm.ords.validation.content.possibly_mentions_wind": [[111, "elm-ords-validation-content-possibly-mentions-wind"]], "elm.ords.validation.location": [[112, "module-elm.ords.validation.location"]], "elm.ords.validation.location.CountyJurisdictionValidator": [[113, "elm-ords-validation-location-countyjurisdictionvalidator"]], "elm.ords.validation.location.CountyNameValidator": [[114, "elm-ords-validation-location-countynamevalidator"]], "elm.ords.validation.location.CountyValidator": [[115, "elm-ords-validation-location-countyvalidator"]], "elm.ords.validation.location.FixedMessageValidator": [[116, "elm-ords-validation-location-fixedmessagevalidator"]], "elm.ords.validation.location.URLValidator": [[117, "elm-ords-validation-location-urlvalidator"]], "elm.osti": [[118, "module-elm.osti"]], "elm.osti.OstiList": [[119, "elm-osti-ostilist"]], "elm.osti.OstiRecord": [[120, "elm-osti-ostirecord"]], "elm.pdf": [[121, "module-elm.pdf"]], "elm.pdf.PDFtoTXT": [[122, "elm-pdf-pdftotxt"]], "elm.summary": [[123, "module-elm.summary"]], "elm.summary.Summary": [[124, "elm-summary-summary"]], "elm.tree": [[125, "module-elm.tree"]], "elm.tree.DecisionTree": [[126, "elm-tree-decisiontree"]], "elm.utilities": [[127, "module-elm.utilities"]], "elm.utilities.parse": [[128, "module-elm.utilities.parse"]], "elm.utilities.parse.clean_headers": [[129, "elm-utilities-parse-clean-headers"]], "elm.utilities.parse.combine_pages": [[130, "elm-utilities-parse-combine-pages"]], "elm.utilities.parse.format_html_tables": [[131, "elm-utilities-parse-format-html-tables"]], "elm.utilities.parse.html_to_text": [[132, "elm-utilities-parse-html-to-text"]], "elm.utilities.parse.is_multi_col": [[133, "elm-utilities-parse-is-multi-col"]], "elm.utilities.parse.read_pdf": [[134, "elm-utilities-parse-read-pdf"]], "elm.utilities.parse.read_pdf_ocr": [[135, "elm-utilities-parse-read-pdf-ocr"]], "elm.utilities.parse.remove_blank_pages": [[136, "elm-utilities-parse-remove-blank-pages"]], "elm.utilities.parse.remove_empty_lines_or_page_footers": [[137, "elm-utilities-parse-remove-empty-lines-or-page-footers"]], "elm.utilities.parse.replace_common_pdf_conversion_chars": [[138, "elm-utilities-parse-replace-common-pdf-conversion-chars"]], "elm.utilities.parse.replace_excessive_newlines": [[139, "elm-utilities-parse-replace-excessive-newlines"]], "elm.utilities.parse.replace_multi_dot_lines": [[140, "elm-utilities-parse-replace-multi-dot-lines"]], "elm.utilities.retry": [[141, "module-elm.utilities.retry"]], "elm.utilities.retry.async_retry_with_exponential_backoff": [[142, "elm-utilities-retry-async-retry-with-exponential-backoff"]], "elm.utilities.retry.retry_with_exponential_backoff": [[143, "elm-utilities-retry-retry-with-exponential-backoff"]], "elm.version": [[144, "module-elm.version"]], "elm.web": [[145, "module-elm.web"]], "elm.web.document": [[146, "module-elm.web.document"]], "elm.web.document.BaseDocument": [[147, "elm-web-document-basedocument"]], "elm.web.document.HTMLDocument": [[148, "elm-web-document-htmldocument"]], "elm.web.document.PDFDocument": [[149, "elm-web-document-pdfdocument"]], "elm.web.file_loader": [[150, "module-elm.web.file_loader"]], "elm.web.file_loader.AsyncFileLoader": [[151, "elm-web-file-loader-asyncfileloader"]], "elm.web.google_search": [[152, "module-elm.web.google_search"]], "elm.web.google_search.PlaywrightGoogleLinkSearch": [[153, "elm-web-google-search-playwrightgooglelinksearch"]], "elm.web.html_pw": [[154, "module-elm.web.html_pw"]], "elm.web.html_pw.load_html_with_pw": [[155, "elm-web-html-pw-load-html-with-pw"]], "elm.web.utilities": [[156, "module-elm.web.utilities"]], "elm.web.utilities.clean_search_query": [[157, "elm-web-utilities-clean-search-query"]], "elm.web.utilities.compute_fn_from_url": [[158, "elm-web-utilities-compute-fn-from-url"]], "elm.web.utilities.write_url_doc_to_file": [[159, "elm-web-utilities-write-url-doc-to-file"]], "elm.wizard": [[160, "module-elm.wizard"]], "elm.wizard.EnergyWizard": [[161, "elm-wizard-energywizard"]], "ords": [[163, "elm-ords"]]}, "docnames": ["_autosummary/elm", "_autosummary/elm.base", "_autosummary/elm.base.ApiBase", "_autosummary/elm.base.ApiQueue", "_autosummary/elm.chunk", "_autosummary/elm.chunk.Chunker", "_autosummary/elm.cli", "_autosummary/elm.embed", "_autosummary/elm.embed.ChunkAndEmbed", "_autosummary/elm.exceptions", "_autosummary/elm.exceptions.ELMError", "_autosummary/elm.exceptions.ELMRuntimeError", "_autosummary/elm.ords", "_autosummary/elm.ords.download", "_autosummary/elm.ords.download.download_county_ordinance", "_autosummary/elm.ords.extraction", "_autosummary/elm.ords.extraction.apply", "_autosummary/elm.ords.extraction.apply.check_for_ordinance_info", "_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm", "_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation", "_autosummary/elm.ords.extraction.apply.extract_ordinance_values", "_autosummary/elm.ords.extraction.date", "_autosummary/elm.ords.extraction.date.DateExtractor", "_autosummary/elm.ords.extraction.features", "_autosummary/elm.ords.extraction.features.SetbackFeatures", "_autosummary/elm.ords.extraction.graphs", "_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no", "_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no", "_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes", "_autosummary/elm.ords.extraction.graphs.setup_base_graph", "_autosummary/elm.ords.extraction.graphs.setup_conditional", "_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction", "_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types", "_autosummary/elm.ords.extraction.graphs.setup_multiplier", "_autosummary/elm.ords.extraction.graphs.setup_participating_owner", "_autosummary/elm.ords.extraction.ngrams", "_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams", "_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment", "_autosummary/elm.ords.extraction.ordinance", "_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor", "_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator", "_autosummary/elm.ords.extraction.parse", "_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser", "_autosummary/elm.ords.extraction.tree", "_autosummary/elm.ords.extraction.tree.AsyncDecisionTree", "_autosummary/elm.ords.llm", "_autosummary/elm.ords.llm.calling", "_autosummary/elm.ords.llm.calling.BaseLLMCaller", "_autosummary/elm.ords.llm.calling.ChatLLMCaller", "_autosummary/elm.ords.llm.calling.LLMCaller", "_autosummary/elm.ords.llm.calling.StructuredLLMCaller", "_autosummary/elm.ords.process", "_autosummary/elm.ords.process.download_doc_for_county", "_autosummary/elm.ords.process.download_docs_for_county_with_logging", "_autosummary/elm.ords.process.process_counties_with_openai", "_autosummary/elm.ords.services", "_autosummary/elm.ords.services.base", "_autosummary/elm.ords.services.base.RateLimitedService", "_autosummary/elm.ords.services.base.Service", "_autosummary/elm.ords.services.cpu", "_autosummary/elm.ords.services.cpu.PDFLoader", "_autosummary/elm.ords.services.cpu.ProcessPoolService", "_autosummary/elm.ords.services.cpu.read_pdf_doc", "_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr", "_autosummary/elm.ords.services.openai", "_autosummary/elm.ords.services.openai.OpenAIService", "_autosummary/elm.ords.services.openai.count_tokens", "_autosummary/elm.ords.services.openai.usage_from_response", "_autosummary/elm.ords.services.provider", "_autosummary/elm.ords.services.provider.RunningAsyncServices", "_autosummary/elm.ords.services.queues", "_autosummary/elm.ords.services.queues.get_service_queue", "_autosummary/elm.ords.services.queues.initialize_service_queue", "_autosummary/elm.ords.services.queues.tear_down_service_queue", "_autosummary/elm.ords.services.threaded", "_autosummary/elm.ords.services.threaded.CleanedFileWriter", "_autosummary/elm.ords.services.threaded.FileMover", "_autosummary/elm.ords.services.threaded.OrdDBFileWriter", "_autosummary/elm.ords.services.threaded.StoreFileOnDisk", "_autosummary/elm.ords.services.threaded.TempFileCache", "_autosummary/elm.ords.services.threaded.ThreadedService", "_autosummary/elm.ords.services.threaded.UsageUpdater", "_autosummary/elm.ords.services.usage", "_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker", "_autosummary/elm.ords.services.usage.TimedEntry", "_autosummary/elm.ords.services.usage.UsageTracker", "_autosummary/elm.ords.utilities", "_autosummary/elm.ords.utilities.counties", "_autosummary/elm.ords.utilities.counties.county_websites", "_autosummary/elm.ords.utilities.counties.load_all_county_info", "_autosummary/elm.ords.utilities.counties.load_counties_from_fp", "_autosummary/elm.ords.utilities.exceptions", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsError", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError", "_autosummary/elm.ords.utilities.location", "_autosummary/elm.ords.utilities.location.County", "_autosummary/elm.ords.utilities.location.Location", "_autosummary/elm.ords.utilities.parsing", "_autosummary/elm.ords.utilities.parsing.llm_response_as_json", "_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts", "_autosummary/elm.ords.utilities.queued_logging", "_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler", "_autosummary/elm.ords.utilities.queued_logging.LocationFileLog", "_autosummary/elm.ords.utilities.queued_logging.LocationFilter", "_autosummary/elm.ords.utilities.queued_logging.LogListener", "_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter", "_autosummary/elm.ords.validation", "_autosummary/elm.ords.validation.content", "_autosummary/elm.ords.validation.content.ValidationWithMemory", "_autosummary/elm.ords.validation.content.possibly_mentions_wind", "_autosummary/elm.ords.validation.location", "_autosummary/elm.ords.validation.location.CountyJurisdictionValidator", "_autosummary/elm.ords.validation.location.CountyNameValidator", "_autosummary/elm.ords.validation.location.CountyValidator", "_autosummary/elm.ords.validation.location.FixedMessageValidator", "_autosummary/elm.ords.validation.location.URLValidator", "_autosummary/elm.osti", "_autosummary/elm.osti.OstiList", "_autosummary/elm.osti.OstiRecord", "_autosummary/elm.pdf", "_autosummary/elm.pdf.PDFtoTXT", "_autosummary/elm.summary", "_autosummary/elm.summary.Summary", "_autosummary/elm.tree", "_autosummary/elm.tree.DecisionTree", "_autosummary/elm.utilities", "_autosummary/elm.utilities.parse", "_autosummary/elm.utilities.parse.clean_headers", "_autosummary/elm.utilities.parse.combine_pages", "_autosummary/elm.utilities.parse.format_html_tables", "_autosummary/elm.utilities.parse.html_to_text", "_autosummary/elm.utilities.parse.is_multi_col", "_autosummary/elm.utilities.parse.read_pdf", "_autosummary/elm.utilities.parse.read_pdf_ocr", "_autosummary/elm.utilities.parse.remove_blank_pages", "_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers", "_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars", "_autosummary/elm.utilities.parse.replace_excessive_newlines", "_autosummary/elm.utilities.parse.replace_multi_dot_lines", "_autosummary/elm.utilities.retry", "_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff", "_autosummary/elm.utilities.retry.retry_with_exponential_backoff", "_autosummary/elm.version", "_autosummary/elm.web", "_autosummary/elm.web.document", "_autosummary/elm.web.document.BaseDocument", "_autosummary/elm.web.document.HTMLDocument", "_autosummary/elm.web.document.PDFDocument", "_autosummary/elm.web.file_loader", "_autosummary/elm.web.file_loader.AsyncFileLoader", "_autosummary/elm.web.google_search", "_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch", "_autosummary/elm.web.html_pw", "_autosummary/elm.web.html_pw.load_html_with_pw", "_autosummary/elm.web.utilities", "_autosummary/elm.web.utilities.clean_search_query", "_autosummary/elm.web.utilities.compute_fn_from_url", "_autosummary/elm.web.utilities.write_url_doc_to_file", "_autosummary/elm.wizard", "_autosummary/elm.wizard.EnergyWizard", "_cli/cli", "_cli/elm", "api", "examples", "examples.energy_wizard", "examples.ordinance_gpt", "index", "installation"], "envversion": {"sphinx": 61, "sphinx.domains.c": 3, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 9, "sphinx.domains.index": 1, "sphinx.domains.javascript": 3, "sphinx.domains.math": 2, "sphinx.domains.python": 4, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinx.ext.intersphinx": 1, "sphinx.ext.viewcode": 1}, "filenames": ["_autosummary/elm.rst", "_autosummary/elm.base.rst", "_autosummary/elm.base.ApiBase.rst", "_autosummary/elm.base.ApiQueue.rst", "_autosummary/elm.chunk.rst", "_autosummary/elm.chunk.Chunker.rst", "_autosummary/elm.cli.rst", "_autosummary/elm.embed.rst", "_autosummary/elm.embed.ChunkAndEmbed.rst", "_autosummary/elm.exceptions.rst", "_autosummary/elm.exceptions.ELMError.rst", "_autosummary/elm.exceptions.ELMRuntimeError.rst", "_autosummary/elm.ords.rst", "_autosummary/elm.ords.download.rst", "_autosummary/elm.ords.download.download_county_ordinance.rst", "_autosummary/elm.ords.extraction.rst", "_autosummary/elm.ords.extraction.apply.rst", "_autosummary/elm.ords.extraction.apply.check_for_ordinance_info.rst", "_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_llm.rst", "_autosummary/elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation.rst", "_autosummary/elm.ords.extraction.apply.extract_ordinance_values.rst", "_autosummary/elm.ords.extraction.date.rst", "_autosummary/elm.ords.extraction.date.DateExtractor.rst", "_autosummary/elm.ords.extraction.features.rst", "_autosummary/elm.ords.extraction.features.SetbackFeatures.rst", "_autosummary/elm.ords.extraction.graphs.rst", "_autosummary/elm.ords.extraction.graphs.llm_response_does_not_start_with_no.rst", "_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_no.rst", "_autosummary/elm.ords.extraction.graphs.llm_response_starts_with_yes.rst", "_autosummary/elm.ords.extraction.graphs.setup_base_graph.rst", "_autosummary/elm.ords.extraction.graphs.setup_conditional.rst", "_autosummary/elm.ords.extraction.graphs.setup_graph_extra_restriction.rst", "_autosummary/elm.ords.extraction.graphs.setup_graph_wes_types.rst", "_autosummary/elm.ords.extraction.graphs.setup_multiplier.rst", "_autosummary/elm.ords.extraction.graphs.setup_participating_owner.rst", "_autosummary/elm.ords.extraction.ngrams.rst", "_autosummary/elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams.rst", "_autosummary/elm.ords.extraction.ngrams.sentence_ngram_containment.rst", "_autosummary/elm.ords.extraction.ordinance.rst", "_autosummary/elm.ords.extraction.ordinance.OrdinanceExtractor.rst", "_autosummary/elm.ords.extraction.ordinance.OrdinanceValidator.rst", "_autosummary/elm.ords.extraction.parse.rst", "_autosummary/elm.ords.extraction.parse.StructuredOrdinanceParser.rst", "_autosummary/elm.ords.extraction.tree.rst", "_autosummary/elm.ords.extraction.tree.AsyncDecisionTree.rst", "_autosummary/elm.ords.llm.rst", "_autosummary/elm.ords.llm.calling.rst", "_autosummary/elm.ords.llm.calling.BaseLLMCaller.rst", "_autosummary/elm.ords.llm.calling.ChatLLMCaller.rst", "_autosummary/elm.ords.llm.calling.LLMCaller.rst", "_autosummary/elm.ords.llm.calling.StructuredLLMCaller.rst", "_autosummary/elm.ords.process.rst", "_autosummary/elm.ords.process.download_doc_for_county.rst", "_autosummary/elm.ords.process.download_docs_for_county_with_logging.rst", "_autosummary/elm.ords.process.process_counties_with_openai.rst", "_autosummary/elm.ords.services.rst", "_autosummary/elm.ords.services.base.rst", "_autosummary/elm.ords.services.base.RateLimitedService.rst", "_autosummary/elm.ords.services.base.Service.rst", "_autosummary/elm.ords.services.cpu.rst", "_autosummary/elm.ords.services.cpu.PDFLoader.rst", "_autosummary/elm.ords.services.cpu.ProcessPoolService.rst", "_autosummary/elm.ords.services.cpu.read_pdf_doc.rst", "_autosummary/elm.ords.services.cpu.read_pdf_doc_ocr.rst", "_autosummary/elm.ords.services.openai.rst", "_autosummary/elm.ords.services.openai.OpenAIService.rst", "_autosummary/elm.ords.services.openai.count_tokens.rst", "_autosummary/elm.ords.services.openai.usage_from_response.rst", "_autosummary/elm.ords.services.provider.rst", "_autosummary/elm.ords.services.provider.RunningAsyncServices.rst", "_autosummary/elm.ords.services.queues.rst", "_autosummary/elm.ords.services.queues.get_service_queue.rst", "_autosummary/elm.ords.services.queues.initialize_service_queue.rst", "_autosummary/elm.ords.services.queues.tear_down_service_queue.rst", "_autosummary/elm.ords.services.threaded.rst", "_autosummary/elm.ords.services.threaded.CleanedFileWriter.rst", "_autosummary/elm.ords.services.threaded.FileMover.rst", "_autosummary/elm.ords.services.threaded.OrdDBFileWriter.rst", "_autosummary/elm.ords.services.threaded.StoreFileOnDisk.rst", "_autosummary/elm.ords.services.threaded.TempFileCache.rst", "_autosummary/elm.ords.services.threaded.ThreadedService.rst", "_autosummary/elm.ords.services.threaded.UsageUpdater.rst", "_autosummary/elm.ords.services.usage.rst", "_autosummary/elm.ords.services.usage.TimeBoundedUsageTracker.rst", "_autosummary/elm.ords.services.usage.TimedEntry.rst", "_autosummary/elm.ords.services.usage.UsageTracker.rst", "_autosummary/elm.ords.utilities.rst", "_autosummary/elm.ords.utilities.counties.rst", "_autosummary/elm.ords.utilities.counties.county_websites.rst", "_autosummary/elm.ords.utilities.counties.load_all_county_info.rst", "_autosummary/elm.ords.utilities.counties.load_counties_from_fp.rst", "_autosummary/elm.ords.utilities.exceptions.rst", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsError.rst", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsNotInitializedError.rst", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsRuntimeError.rst", "_autosummary/elm.ords.utilities.exceptions.ELMOrdsValueError.rst", "_autosummary/elm.ords.utilities.location.rst", "_autosummary/elm.ords.utilities.location.County.rst", "_autosummary/elm.ords.utilities.location.Location.rst", "_autosummary/elm.ords.utilities.parsing.rst", "_autosummary/elm.ords.utilities.parsing.llm_response_as_json.rst", "_autosummary/elm.ords.utilities.parsing.merge_overlapping_texts.rst", "_autosummary/elm.ords.utilities.queued_logging.rst", "_autosummary/elm.ords.utilities.queued_logging.LocalProcessQueueHandler.rst", "_autosummary/elm.ords.utilities.queued_logging.LocationFileLog.rst", "_autosummary/elm.ords.utilities.queued_logging.LocationFilter.rst", "_autosummary/elm.ords.utilities.queued_logging.LogListener.rst", "_autosummary/elm.ords.utilities.queued_logging.NoLocationFilter.rst", "_autosummary/elm.ords.validation.rst", "_autosummary/elm.ords.validation.content.rst", "_autosummary/elm.ords.validation.content.ValidationWithMemory.rst", "_autosummary/elm.ords.validation.content.possibly_mentions_wind.rst", "_autosummary/elm.ords.validation.location.rst", "_autosummary/elm.ords.validation.location.CountyJurisdictionValidator.rst", "_autosummary/elm.ords.validation.location.CountyNameValidator.rst", "_autosummary/elm.ords.validation.location.CountyValidator.rst", "_autosummary/elm.ords.validation.location.FixedMessageValidator.rst", "_autosummary/elm.ords.validation.location.URLValidator.rst", "_autosummary/elm.osti.rst", "_autosummary/elm.osti.OstiList.rst", "_autosummary/elm.osti.OstiRecord.rst", "_autosummary/elm.pdf.rst", "_autosummary/elm.pdf.PDFtoTXT.rst", "_autosummary/elm.summary.rst", "_autosummary/elm.summary.Summary.rst", "_autosummary/elm.tree.rst", "_autosummary/elm.tree.DecisionTree.rst", "_autosummary/elm.utilities.rst", "_autosummary/elm.utilities.parse.rst", "_autosummary/elm.utilities.parse.clean_headers.rst", "_autosummary/elm.utilities.parse.combine_pages.rst", "_autosummary/elm.utilities.parse.format_html_tables.rst", "_autosummary/elm.utilities.parse.html_to_text.rst", "_autosummary/elm.utilities.parse.is_multi_col.rst", "_autosummary/elm.utilities.parse.read_pdf.rst", "_autosummary/elm.utilities.parse.read_pdf_ocr.rst", "_autosummary/elm.utilities.parse.remove_blank_pages.rst", "_autosummary/elm.utilities.parse.remove_empty_lines_or_page_footers.rst", "_autosummary/elm.utilities.parse.replace_common_pdf_conversion_chars.rst", "_autosummary/elm.utilities.parse.replace_excessive_newlines.rst", "_autosummary/elm.utilities.parse.replace_multi_dot_lines.rst", "_autosummary/elm.utilities.retry.rst", "_autosummary/elm.utilities.retry.async_retry_with_exponential_backoff.rst", "_autosummary/elm.utilities.retry.retry_with_exponential_backoff.rst", "_autosummary/elm.version.rst", "_autosummary/elm.web.rst", "_autosummary/elm.web.document.rst", "_autosummary/elm.web.document.BaseDocument.rst", "_autosummary/elm.web.document.HTMLDocument.rst", "_autosummary/elm.web.document.PDFDocument.rst", "_autosummary/elm.web.file_loader.rst", "_autosummary/elm.web.file_loader.AsyncFileLoader.rst", "_autosummary/elm.web.google_search.rst", "_autosummary/elm.web.google_search.PlaywrightGoogleLinkSearch.rst", "_autosummary/elm.web.html_pw.rst", "_autosummary/elm.web.html_pw.load_html_with_pw.rst", "_autosummary/elm.web.utilities.rst", "_autosummary/elm.web.utilities.clean_search_query.rst", "_autosummary/elm.web.utilities.compute_fn_from_url.rst", "_autosummary/elm.web.utilities.write_url_doc_to_file.rst", "_autosummary/elm.wizard.rst", "_autosummary/elm.wizard.EnergyWizard.rst", "_cli/cli.rst", "_cli/elm.rst", "api.rst", "examples.rst", "examples.energy_wizard.rst", "examples.ordinance_gpt.rst", "index.rst", "installation.rst"], "indexentries": {"--config": [[163, "cmdoption-elm-ords-c", false]], "--verbose": [[163, "cmdoption-elm-ords-v", false]], "--version": [[163, "cmdoption-elm-version", false]], "-c": [[163, "cmdoption-elm-ords-c", false]], "-v": [[163, "cmdoption-elm-ords-v", false]], "__add__() (ostilist method)": [[119, "elm.osti.OstiList.__add__", false]], "__mul__() (ostilist method)": [[119, "elm.osti.OstiList.__mul__", false]], "acquire() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.acquire", false]], "acquire_resources() (cleanedfilewriter method)": [[75, "elm.ords.services.threaded.CleanedFileWriter.acquire_resources", false]], "acquire_resources() (filemover method)": [[76, "elm.ords.services.threaded.FileMover.acquire_resources", false]], "acquire_resources() (openaiservice method)": [[65, "elm.ords.services.openai.OpenAIService.acquire_resources", false]], "acquire_resources() (orddbfilewriter method)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.acquire_resources", false]], "acquire_resources() (pdfloader method)": [[60, "elm.ords.services.cpu.PDFLoader.acquire_resources", false]], "acquire_resources() (processpoolservice method)": [[61, "elm.ords.services.cpu.ProcessPoolService.acquire_resources", false]], "acquire_resources() (ratelimitedservice method)": [[57, "elm.ords.services.base.RateLimitedService.acquire_resources", false]], "acquire_resources() (service method)": [[58, "elm.ords.services.base.Service.acquire_resources", false]], "acquire_resources() (storefileondisk method)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.acquire_resources", false]], "acquire_resources() (tempfilecache method)": [[79, "elm.ords.services.threaded.TempFileCache.acquire_resources", false]], "acquire_resources() (threadedservice method)": [[80, "elm.ords.services.threaded.ThreadedService.acquire_resources", false]], "acquire_resources() (usageupdater method)": [[81, "elm.ords.services.threaded.UsageUpdater.acquire_resources", false]], "add() (timeboundedusagetracker method)": [[83, "elm.ords.services.usage.TimeBoundedUsageTracker.add", false]], "add_overlap() (chunker method)": [[5, "elm.chunk.Chunker.add_overlap", false]], "add_to() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.add_to", false]], "addfilter() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.addFilter", false]], "addhandler() (loglistener method)": [[106, "elm.ords.utilities.queued_logging.LogListener.addHandler", false]], "all_messages_txt (apibase property)": [[2, "elm.base.ApiBase.all_messages_txt", false]], "all_messages_txt (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.all_messages_txt", false]], "all_messages_txt (chunkandembed property)": [[8, "elm.embed.ChunkAndEmbed.all_messages_txt", false]], "all_messages_txt (chunker property)": [[5, "elm.chunk.Chunker.all_messages_txt", false]], "all_messages_txt (decisiontree property)": [[126, "elm.tree.DecisionTree.all_messages_txt", false]], "all_messages_txt (energywizard property)": [[161, "elm.wizard.EnergyWizard.all_messages_txt", false]], "all_messages_txt (pdftotxt property)": [[122, "elm.pdf.PDFtoTXT.all_messages_txt", false]], "all_messages_txt (summary property)": [[124, "elm.summary.Summary.all_messages_txt", false]], "api (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.api", false]], "api (decisiontree property)": [[126, "elm.tree.DecisionTree.api", false]], "apibase (class in elm.base)": [[2, "elm.base.ApiBase", false]], "apiqueue (class in elm.base)": [[3, "elm.base.ApiQueue", false]], "append() (ostilist method)": [[119, "elm.osti.OstiList.append", false]], "async_call_node() (asyncdecisiontree method)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.async_call_node", false]], "async_retry_with_exponential_backoff() (in module elm.utilities.retry)": [[142, "elm.utilities.retry.async_retry_with_exponential_backoff", false]], "async_run() (asyncdecisiontree method)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.async_run", false]], "asyncdecisiontree (class in elm.ords.extraction.tree)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree", false]], "asyncfileloader (class in elm.web.file_loader)": [[151, "elm.web.file_loader.AsyncFileLoader", false]], "authors (ostirecord property)": [[120, "elm.osti.OstiRecord.authors", false]], "base_url (ostilist attribute)": [[119, "elm.osti.OstiList.BASE_URL", false]], "basedocument (class in elm.web.document)": [[147, "elm.web.document.BaseDocument", false]], "basellmcaller (class in elm.ords.llm.calling)": [[47, "elm.ords.llm.calling.BaseLLMCaller", false]], "call() (chatllmcaller method)": [[48, "elm.ords.llm.calling.ChatLLMCaller.call", false]], "call() (cleanedfilewriter class method)": [[75, "elm.ords.services.threaded.CleanedFileWriter.call", false]], "call() (filemover class method)": [[76, "elm.ords.services.threaded.FileMover.call", false]], "call() (llmcaller method)": [[49, "elm.ords.llm.calling.LLMCaller.call", false]], "call() (openaiservice class method)": [[65, "elm.ords.services.openai.OpenAIService.call", false]], "call() (orddbfilewriter class method)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.call", false]], "call() (pdfloader class method)": [[60, "elm.ords.services.cpu.PDFLoader.call", false]], "call() (processpoolservice class method)": [[61, "elm.ords.services.cpu.ProcessPoolService.call", false]], "call() (ratelimitedservice class method)": [[57, "elm.ords.services.base.RateLimitedService.call", false]], "call() (service class method)": [[58, "elm.ords.services.base.Service.call", false]], "call() (storefileondisk class method)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.call", false]], "call() (structuredllmcaller method)": [[50, "elm.ords.llm.calling.StructuredLLMCaller.call", false]], "call() (tempfilecache class method)": [[79, "elm.ords.services.threaded.TempFileCache.call", false]], "call() (threadedservice class method)": [[80, "elm.ords.services.threaded.ThreadedService.call", false]], "call() (usageupdater class method)": [[81, "elm.ords.services.threaded.UsageUpdater.call", false]], "call_api() (apibase static method)": [[2, "elm.base.ApiBase.call_api", false]], "call_api() (chunkandembed static method)": [[8, "elm.embed.ChunkAndEmbed.call_api", false]], "call_api() (chunker static method)": [[5, "elm.chunk.Chunker.call_api", false]], "call_api() (energywizard static method)": [[161, "elm.wizard.EnergyWizard.call_api", false]], "call_api() (pdftotxt static method)": [[122, "elm.pdf.PDFtoTXT.call_api", false]], "call_api() (summary static method)": [[124, "elm.summary.Summary.call_api", false]], "call_api_async() (apibase method)": [[2, "elm.base.ApiBase.call_api_async", false]], "call_api_async() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.call_api_async", false]], "call_api_async() (chunker method)": [[5, "elm.chunk.Chunker.call_api_async", false]], "call_api_async() (energywizard method)": [[161, "elm.wizard.EnergyWizard.call_api_async", false]], "call_api_async() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.call_api_async", false]], "call_api_async() (summary method)": [[124, "elm.summary.Summary.call_api_async", false]], "call_node() (asyncdecisiontree method)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.call_node", false]], "call_node() (decisiontree method)": [[126, "elm.tree.DecisionTree.call_node", false]], "can_process (cleanedfilewriter property)": [[75, "elm.ords.services.threaded.CleanedFileWriter.can_process", false]], "can_process (filemover property)": [[76, "elm.ords.services.threaded.FileMover.can_process", false]], "can_process (openaiservice property)": [[65, "elm.ords.services.openai.OpenAIService.can_process", false]], "can_process (orddbfilewriter property)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.can_process", false]], "can_process (pdfloader property)": [[60, "elm.ords.services.cpu.PDFLoader.can_process", false]], "can_process (processpoolservice property)": [[61, "elm.ords.services.cpu.ProcessPoolService.can_process", false]], "can_process (ratelimitedservice property)": [[57, "elm.ords.services.base.RateLimitedService.can_process", false]], "can_process (service property)": [[58, "elm.ords.services.base.Service.can_process", false]], "can_process (storefileondisk property)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.can_process", false]], "can_process (tempfilecache property)": [[79, "elm.ords.services.threaded.TempFileCache.can_process", false]], "can_process (threadedservice property)": [[80, "elm.ords.services.threaded.ThreadedService.can_process", false]], "can_process (usageupdater property)": [[81, "elm.ords.services.threaded.UsageUpdater.can_process", false]], "chat() (apibase method)": [[2, "elm.base.ApiBase.chat", false]], "chat() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.chat", false]], "chat() (chunker method)": [[5, "elm.chunk.Chunker.chat", false]], "chat() (energywizard method)": [[161, "elm.wizard.EnergyWizard.chat", false]], "chat() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.chat", false]], "chat() (summary method)": [[124, "elm.summary.Summary.chat", false]], "chat_llm_caller (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.chat_llm_caller", false]], "chatllmcaller (class in elm.ords.llm.calling)": [[48, "elm.ords.llm.calling.ChatLLMCaller", false]], "check() (countyjurisdictionvalidator method)": [[113, "elm.ords.validation.location.CountyJurisdictionValidator.check", false]], "check() (countynamevalidator method)": [[114, "elm.ords.validation.location.CountyNameValidator.check", false]], "check() (countyvalidator method)": [[115, "elm.ords.validation.location.CountyValidator.check", false]], "check() (fixedmessagevalidator method)": [[116, "elm.ords.validation.location.FixedMessageValidator.check", false]], "check() (urlvalidator method)": [[117, "elm.ords.validation.location.URLValidator.check", false]], "check_for_correct_size() (ordinanceextractor method)": [[39, "elm.ords.extraction.ordinance.OrdinanceExtractor.check_for_correct_size", false]], "check_for_ordinance_info() (in module elm.ords.extraction.apply)": [[17, "elm.ords.extraction.apply.check_for_ordinance_info", false]], "check_for_restrictions() (ordinanceextractor method)": [[39, "elm.ords.extraction.ordinance.OrdinanceExtractor.check_for_restrictions", false]], "chunk_text() (chunker method)": [[5, "elm.chunk.Chunker.chunk_text", false]], "chunk_tokens (chunker property)": [[5, "elm.chunk.Chunker.chunk_tokens", false]], "chunkandembed (class in elm.embed)": [[8, "elm.embed.ChunkAndEmbed", false]], "chunker (class in elm.chunk)": [[5, "elm.chunk.Chunker", false]], "chunks (chunker property)": [[5, "elm.chunk.Chunker.chunks", false]], "clean_header_kwargs (pdfdocument attribute)": [[149, "elm.web.document.PDFDocument.CLEAN_HEADER_KWARGS", false]], "clean_headers() (in module elm.utilities.parse)": [[129, "elm.utilities.parse.clean_headers", false]], "clean_headers() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.clean_headers", false]], "clean_paragraphs() (chunker static method)": [[5, "elm.chunk.Chunker.clean_paragraphs", false]], "clean_poppler() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.clean_poppler", false]], "clean_search_query() (in module elm.web.utilities)": [[157, "elm.web.utilities.clean_search_query", false]], "clean_tables() (chunkandembed static method)": [[8, "elm.embed.ChunkAndEmbed.clean_tables", false]], "clean_txt() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.clean_txt", false]], "clean_txt_async() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.clean_txt_async", false]], "cleanedfilewriter (class in elm.ords.services.threaded)": [[75, "elm.ords.services.threaded.CleanedFileWriter", false]], "clear() (apibase method)": [[2, "elm.base.ApiBase.clear", false]], "clear() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.clear", false]], "clear() (chunker method)": [[5, "elm.chunk.Chunker.clear", false]], "clear() (energywizard method)": [[161, "elm.wizard.EnergyWizard.clear", false]], "clear() (ostilist method)": [[119, "elm.osti.OstiList.clear", false]], "clear() (ostirecord method)": [[120, "elm.osti.OstiRecord.clear", false]], "clear() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.clear", false]], "clear() (summary method)": [[124, "elm.summary.Summary.clear", false]], "clear() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.clear", false]], "close() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.close", false]], "collect_jobs() (apiqueue method)": [[3, "elm.base.ApiQueue.collect_jobs", false]], "combine() (summary method)": [[124, "elm.summary.Summary.combine", false]], "combine_pages() (in module elm.utilities.parse)": [[130, "elm.utilities.parse.combine_pages", false]], "compute_fn_from_url() (in module elm.web.utilities)": [[158, "elm.web.utilities.compute_fn_from_url", false]], "convert_text_to_sentence_ngrams() (in module elm.ords.extraction.ngrams)": [[36, "elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams", false]], "copy() (ostilist method)": [[119, "elm.osti.OstiList.copy", false]], "copy() (ostirecord method)": [[120, "elm.osti.OstiRecord.copy", false]], "cosine_dist() (energywizard method)": [[161, "elm.wizard.EnergyWizard.cosine_dist", false]], "count() (ostilist method)": [[119, "elm.osti.OstiList.count", false]], "count_tokens() (apibase static method)": [[2, "elm.base.ApiBase.count_tokens", false]], "count_tokens() (chunkandembed static method)": [[8, "elm.embed.ChunkAndEmbed.count_tokens", false]], "count_tokens() (chunker static method)": [[5, "elm.chunk.Chunker.count_tokens", false]], "count_tokens() (energywizard static method)": [[161, "elm.wizard.EnergyWizard.count_tokens", false]], "count_tokens() (in module elm.ords.services.openai)": [[66, "elm.ords.services.openai.count_tokens", false]], "count_tokens() (pdftotxt static method)": [[122, "elm.pdf.PDFtoTXT.count_tokens", false]], "count_tokens() (summary static method)": [[124, "elm.summary.Summary.count_tokens", false]], "county (class in elm.ords.utilities.location)": [[97, "elm.ords.utilities.location.County", false]], "county_websites() (in module elm.ords.utilities.counties)": [[88, "elm.ords.utilities.counties.county_websites", false]], "countyjurisdictionvalidator (class in elm.ords.validation.location)": [[113, "elm.ords.validation.location.CountyJurisdictionValidator", false]], "countynamevalidator (class in elm.ords.validation.location)": [[114, "elm.ords.validation.location.CountyNameValidator", false]], "countyvalidator (class in elm.ords.validation.location)": [[115, "elm.ords.validation.location.CountyValidator", false]], "createlock() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.createLock", false]], "date (ostirecord property)": [[120, "elm.osti.OstiRecord.date", false]], "dateextractor (class in elm.ords.extraction.date)": [[22, "elm.ords.extraction.date.DateExtractor", false]], "decisiontree (class in elm.tree)": [[126, "elm.tree.DecisionTree", false]], "default_header_template (asyncfileloader attribute)": [[151, "elm.web.file_loader.AsyncFileLoader.DEFAULT_HEADER_TEMPLATE", false]], "default_model (apibase attribute)": [[2, "elm.base.ApiBase.DEFAULT_MODEL", false]], "default_model (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.DEFAULT_MODEL", false]], "default_model (chunker attribute)": [[5, "elm.chunk.Chunker.DEFAULT_MODEL", false]], "default_model (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.DEFAULT_MODEL", false]], "default_model (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.DEFAULT_MODEL", false]], "default_model (summary attribute)": [[124, "elm.summary.Summary.DEFAULT_MODEL", false]], "doi (ostirecord property)": [[120, "elm.osti.OstiRecord.doi", false]], "download() (ostilist method)": [[119, "elm.osti.OstiList.download", false]], "download() (ostirecord method)": [[120, "elm.osti.OstiRecord.download", false]], "download_county_ordinance() (in module elm.ords.download)": [[14, "elm.ords.download.download_county_ordinance", false]], "download_doc_for_county() (in module elm.ords.process)": [[52, "elm.ords.process.download_doc_for_county", false]], "download_docs_for_county_with_logging() (in module elm.ords.process)": [[53, "elm.ords.process.download_docs_for_county_with_logging", false]], "elm": [[0, "module-elm", false]], "elm command line option": [[163, "cmdoption-elm-version", false]], "elm-ords command line option": [[163, "cmdoption-elm-ords-c", false], [163, "cmdoption-elm-ords-v", false]], "elm.base": [[1, "module-elm.base", false]], "elm.chunk": [[4, "module-elm.chunk", false]], "elm.cli": [[6, "module-elm.cli", false]], "elm.embed": [[7, "module-elm.embed", false]], "elm.exceptions": [[9, "module-elm.exceptions", false]], "elm.ords": [[12, "module-elm.ords", false]], "elm.ords.download": [[13, "module-elm.ords.download", false]], "elm.ords.extraction": [[15, "module-elm.ords.extraction", false]], "elm.ords.extraction.apply": [[16, "module-elm.ords.extraction.apply", false]], "elm.ords.extraction.date": [[21, "module-elm.ords.extraction.date", false]], "elm.ords.extraction.features": [[23, "module-elm.ords.extraction.features", false]], "elm.ords.extraction.graphs": [[25, "module-elm.ords.extraction.graphs", false]], "elm.ords.extraction.ngrams": [[35, "module-elm.ords.extraction.ngrams", false]], "elm.ords.extraction.ordinance": [[38, "module-elm.ords.extraction.ordinance", false]], "elm.ords.extraction.parse": [[41, "module-elm.ords.extraction.parse", false]], "elm.ords.extraction.tree": [[43, "module-elm.ords.extraction.tree", false]], "elm.ords.llm": [[45, "module-elm.ords.llm", false]], "elm.ords.llm.calling": [[46, "module-elm.ords.llm.calling", false]], "elm.ords.process": [[51, "module-elm.ords.process", false]], "elm.ords.services": [[55, "module-elm.ords.services", false]], "elm.ords.services.base": [[56, "module-elm.ords.services.base", false]], "elm.ords.services.cpu": [[59, "module-elm.ords.services.cpu", false]], "elm.ords.services.openai": [[64, "module-elm.ords.services.openai", false]], "elm.ords.services.provider": [[68, "module-elm.ords.services.provider", false]], "elm.ords.services.queues": [[70, "module-elm.ords.services.queues", false]], "elm.ords.services.threaded": [[74, "module-elm.ords.services.threaded", false]], "elm.ords.services.usage": [[82, "module-elm.ords.services.usage", false]], "elm.ords.utilities": [[86, "module-elm.ords.utilities", false]], "elm.ords.utilities.counties": [[87, "module-elm.ords.utilities.counties", false]], "elm.ords.utilities.exceptions": [[91, "module-elm.ords.utilities.exceptions", false]], "elm.ords.utilities.location": [[96, "module-elm.ords.utilities.location", false]], "elm.ords.utilities.parsing": [[99, "module-elm.ords.utilities.parsing", false]], "elm.ords.utilities.queued_logging": [[102, "module-elm.ords.utilities.queued_logging", false]], "elm.ords.validation": [[108, "module-elm.ords.validation", false]], "elm.ords.validation.content": [[109, "module-elm.ords.validation.content", false]], "elm.ords.validation.location": [[112, "module-elm.ords.validation.location", false]], "elm.osti": [[118, "module-elm.osti", false]], "elm.pdf": [[121, "module-elm.pdf", false]], "elm.summary": [[123, "module-elm.summary", false]], "elm.tree": [[125, "module-elm.tree", false]], "elm.utilities": [[127, "module-elm.utilities", false]], "elm.utilities.parse": [[128, "module-elm.utilities.parse", false]], "elm.utilities.retry": [[141, "module-elm.utilities.retry", false]], "elm.version": [[144, "module-elm.version", false]], "elm.web": [[145, "module-elm.web", false]], "elm.web.document": [[146, "module-elm.web.document", false]], "elm.web.file_loader": [[150, "module-elm.web.file_loader", false]], "elm.web.google_search": [[152, "module-elm.web.google_search", false]], "elm.web.html_pw": [[154, "module-elm.web.html_pw", false]], "elm.web.utilities": [[156, "module-elm.web.utilities", false]], "elm.wizard": [[160, "module-elm.wizard", false]], "elmerror": [[10, "elm.exceptions.ELMError", false]], "elmordserror": [[92, "elm.ords.utilities.exceptions.ELMOrdsError", false]], "elmordsnotinitializederror": [[93, "elm.ords.utilities.exceptions.ELMOrdsNotInitializedError", false]], "elmordsruntimeerror": [[94, "elm.ords.utilities.exceptions.ELMOrdsRuntimeError", false]], "elmordsvalueerror": [[95, "elm.ords.utilities.exceptions.ELMOrdsValueError", false]], "elmruntimeerror": [[11, "elm.exceptions.ELMRuntimeError", false]], "embedding_model (apibase attribute)": [[2, "elm.base.ApiBase.EMBEDDING_MODEL", false]], "embedding_model (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.EMBEDDING_MODEL", false]], "embedding_model (chunker attribute)": [[5, "elm.chunk.Chunker.EMBEDDING_MODEL", false]], "embedding_model (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.EMBEDDING_MODEL", false]], "embedding_model (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.EMBEDDING_MODEL", false]], "embedding_model (summary attribute)": [[124, "elm.summary.Summary.EMBEDDING_MODEL", false]], "embedding_url (apibase attribute)": [[2, "elm.base.ApiBase.EMBEDDING_URL", false]], "embedding_url (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.EMBEDDING_URL", false]], "embedding_url (chunker attribute)": [[5, "elm.chunk.Chunker.EMBEDDING_URL", false]], "embedding_url (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.EMBEDDING_URL", false]], "embedding_url (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.EMBEDDING_URL", false]], "embedding_url (summary attribute)": [[124, "elm.summary.Summary.EMBEDDING_URL", false]], "emit() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.emit", false]], "empty (basedocument property)": [[147, "elm.web.document.BaseDocument.empty", false]], "empty (htmldocument property)": [[148, "elm.web.document.HTMLDocument.empty", false]], "empty (pdfdocument property)": [[149, "elm.web.document.PDFDocument.empty", false]], "energywizard (class in elm.wizard)": [[161, "elm.wizard.EnergyWizard", false]], "engineer_query() (energywizard method)": [[161, "elm.wizard.EnergyWizard.engineer_query", false]], "enqueue() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.enqueue", false]], "expected_results_per_page (playwrightgooglelinksearch attribute)": [[153, "elm.web.google_search.PlaywrightGoogleLinkSearch.EXPECTED_RESULTS_PER_PAGE", false]], "extend() (ostilist method)": [[119, "elm.osti.OstiList.extend", false]], "extract_ordinance_text_with_llm() (in module elm.ords.extraction.apply)": [[18, "elm.ords.extraction.apply.extract_ordinance_text_with_llm", false]], "extract_ordinance_text_with_ngram_validation() (in module elm.ords.extraction.apply)": [[19, "elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation", false]], "extract_ordinance_values() (in module elm.ords.extraction.apply)": [[20, "elm.ords.extraction.apply.extract_ordinance_values", false]], "fetch() (asyncfileloader method)": [[151, "elm.web.file_loader.AsyncFileLoader.fetch", false]], "fetch_all() (asyncfileloader method)": [[151, "elm.web.file_loader.AsyncFileLoader.fetch_all", false]], "file_extension (basedocument property)": [[147, "elm.web.document.BaseDocument.FILE_EXTENSION", false]], "filemover (class in elm.ords.services.threaded)": [[76, "elm.ords.services.threaded.FileMover", false]], "filter() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.filter", false]], "filter() (locationfilter method)": [[105, "elm.ords.utilities.queued_logging.LocationFilter.filter", false]], "filter() (nolocationfilter method)": [[107, "elm.ords.utilities.queued_logging.NoLocationFilter.filter", false]], "fixedmessagevalidator (class in elm.ords.validation.location)": [[116, "elm.ords.validation.location.FixedMessageValidator", false]], "flush() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.flush", false]], "format() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.format", false]], "format_html_tables() (in module elm.utilities.parse)": [[131, "elm.utilities.parse.format_html_tables", false]], "from_osti_ids() (ostilist class method)": [[119, "elm.osti.OstiList.from_osti_ids", false]], "fromkeys() (ostirecord method)": [[120, "elm.osti.OstiRecord.fromkeys", false]], "full_name (county property)": [[97, "elm.ords.utilities.location.County.full_name", false]], "full_name (location property)": [[98, "elm.ords.utilities.location.Location.full_name", false]], "generic_async_query() (apibase method)": [[2, "elm.base.ApiBase.generic_async_query", false]], "generic_async_query() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.generic_async_query", false]], "generic_async_query() (chunker method)": [[5, "elm.chunk.Chunker.generic_async_query", false]], "generic_async_query() (energywizard method)": [[161, "elm.wizard.EnergyWizard.generic_async_query", false]], "generic_async_query() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.generic_async_query", false]], "generic_async_query() (summary method)": [[124, "elm.summary.Summary.generic_async_query", false]], "generic_query() (apibase method)": [[2, "elm.base.ApiBase.generic_query", false]], "generic_query() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.generic_query", false]], "generic_query() (chunker method)": [[5, "elm.chunk.Chunker.generic_query", false]], "generic_query() (energywizard method)": [[161, "elm.wizard.EnergyWizard.generic_query", false]], "generic_query() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.generic_query", false]], "generic_query() (summary method)": [[124, "elm.summary.Summary.generic_query", false]], "get() (ostirecord method)": [[120, "elm.osti.OstiRecord.get", false]], "get() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.get", false]], "get_embedding() (apibase class method)": [[2, "elm.base.ApiBase.get_embedding", false]], "get_embedding() (chunkandembed class method)": [[8, "elm.embed.ChunkAndEmbed.get_embedding", false]], "get_embedding() (chunker class method)": [[5, "elm.chunk.Chunker.get_embedding", false]], "get_embedding() (energywizard class method)": [[161, "elm.wizard.EnergyWizard.get_embedding", false]], "get_embedding() (pdftotxt class method)": [[122, "elm.pdf.PDFtoTXT.get_embedding", false]], "get_embedding() (summary class method)": [[124, "elm.summary.Summary.get_embedding", false]], "get_service_queue() (in module elm.ords.services.queues)": [[71, "elm.ords.services.queues.get_service_queue", false]], "graph (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.graph", false]], "graph (decisiontree property)": [[126, "elm.tree.DecisionTree.graph", false]], "handle() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.handle", false]], "handleerror() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.handleError", false]], "headers (apibase attribute)": [[2, "elm.base.ApiBase.HEADERS", false]], "headers (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.HEADERS", false]], "headers (chunker attribute)": [[5, "elm.chunk.Chunker.HEADERS", false]], "headers (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.HEADERS", false]], "headers (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.HEADERS", false]], "headers (summary attribute)": [[124, "elm.summary.Summary.HEADERS", false]], "history (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.history", false]], "history (decisiontree property)": [[126, "elm.tree.DecisionTree.history", false]], "html_table_to_markdown_kwargs (htmldocument attribute)": [[148, "elm.web.document.HTMLDocument.HTML_TABLE_TO_MARKDOWN_KWARGS", false]], "html_to_text() (in module elm.utilities.parse)": [[132, "elm.utilities.parse.html_to_text", false]], "htmldocument (class in elm.web.document)": [[148, "elm.web.document.HTMLDocument", false]], "index() (ostilist method)": [[119, "elm.osti.OstiList.index", false]], "initialize_service_queue() (in module elm.ords.services.queues)": [[72, "elm.ords.services.queues.initialize_service_queue", false]], "insert() (ostilist method)": [[119, "elm.osti.OstiList.insert", false]], "is_double_col() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.is_double_col", false]], "is_good_paragraph() (chunker static method)": [[5, "elm.chunk.Chunker.is_good_paragraph", false]], "is_legal_text (ordinancevalidator property)": [[40, "elm.ords.extraction.ordinance.OrdinanceValidator.is_legal_text", false]], "is_multi_col() (in module elm.utilities.parse)": [[133, "elm.utilities.parse.is_multi_col", false]], "items() (ostirecord method)": [[120, "elm.osti.OstiRecord.items", false]], "items() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.items", false]], "keys() (ostirecord method)": [[120, "elm.osti.OstiRecord.keys", false]], "keys() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.keys", false]], "llm_response_as_json() (in module elm.ords.utilities.parsing)": [[100, "elm.ords.utilities.parsing.llm_response_as_json", false]], "llm_response_does_not_start_with_no() (in module elm.ords.extraction.graphs)": [[26, "elm.ords.extraction.graphs.llm_response_does_not_start_with_no", false]], "llm_response_starts_with_no() (in module elm.ords.extraction.graphs)": [[27, "elm.ords.extraction.graphs.llm_response_starts_with_no", false]], "llm_response_starts_with_yes() (in module elm.ords.extraction.graphs)": [[28, "elm.ords.extraction.graphs.llm_response_starts_with_yes", false]], "llmcaller (class in elm.ords.llm.calling)": [[49, "elm.ords.llm.calling.LLMCaller", false]], "load_all_county_info() (in module elm.ords.utilities.counties)": [[89, "elm.ords.utilities.counties.load_all_county_info", false]], "load_counties_from_fp() (in module elm.ords.utilities.counties)": [[90, "elm.ords.utilities.counties.load_counties_from_fp", false]], "load_html_with_pw() (in module elm.web.html_pw)": [[155, "elm.web.html_pw.load_html_with_pw", false]], "load_pdf() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.load_pdf", false]], "localprocessqueuehandler (class in elm.ords.utilities.queued_logging)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler", false]], "location (class in elm.ords.utilities.location)": [[98, "elm.ords.utilities.location.Location", false]], "locationfilelog (class in elm.ords.utilities.queued_logging)": [[104, "elm.ords.utilities.queued_logging.LocationFileLog", false]], "locationfilter (class in elm.ords.utilities.queued_logging)": [[105, "elm.ords.utilities.queued_logging.LocationFilter", false]], "loglistener (class in elm.ords.utilities.queued_logging)": [[106, "elm.ords.utilities.queued_logging.LogListener", false]], "make_gpt_messages() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.make_gpt_messages", false]], "make_ref_list() (energywizard method)": [[161, "elm.wizard.EnergyWizard.make_ref_list", false]], "max_concurrent_jobs (cleanedfilewriter attribute)": [[75, "elm.ords.services.threaded.CleanedFileWriter.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (filemover attribute)": [[76, "elm.ords.services.threaded.FileMover.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (openaiservice attribute)": [[65, "elm.ords.services.openai.OpenAIService.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (orddbfilewriter attribute)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (pdfloader attribute)": [[60, "elm.ords.services.cpu.PDFLoader.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (processpoolservice attribute)": [[61, "elm.ords.services.cpu.ProcessPoolService.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (ratelimitedservice attribute)": [[57, "elm.ords.services.base.RateLimitedService.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (service attribute)": [[58, "elm.ords.services.base.Service.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (storefileondisk attribute)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (tempfilecache attribute)": [[79, "elm.ords.services.threaded.TempFileCache.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (threadedservice attribute)": [[80, "elm.ords.services.threaded.ThreadedService.MAX_CONCURRENT_JOBS", false]], "max_concurrent_jobs (usageupdater attribute)": [[81, "elm.ords.services.threaded.UsageUpdater.MAX_CONCURRENT_JOBS", false]], "merge_chunks() (chunker method)": [[5, "elm.chunk.Chunker.merge_chunks", false]], "merge_overlapping_texts() (in module elm.ords.utilities.parsing)": [[101, "elm.ords.utilities.parsing.merge_overlapping_texts", false]], "messages (asyncdecisiontree property)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.messages", false]], "messages (decisiontree property)": [[126, "elm.tree.DecisionTree.messages", false]], "meta (ostilist property)": [[119, "elm.osti.OstiList.meta", false]], "model_instruction (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.MODEL_INSTRUCTION", false]], "model_instruction (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.MODEL_INSTRUCTION", false]], "model_instruction (summary attribute)": [[124, "elm.summary.Summary.MODEL_INSTRUCTION", false]], "model_role (apibase attribute)": [[2, "elm.base.ApiBase.MODEL_ROLE", false]], "model_role (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.MODEL_ROLE", false]], "model_role (chunker attribute)": [[5, "elm.chunk.Chunker.MODEL_ROLE", false]], "model_role (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.MODEL_ROLE", false]], "model_role (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.MODEL_ROLE", false]], "model_role (summary attribute)": [[124, "elm.summary.Summary.MODEL_ROLE", false]], "module": [[0, "module-elm", false], [1, "module-elm.base", false], [4, "module-elm.chunk", false], [6, "module-elm.cli", false], [7, "module-elm.embed", false], [9, "module-elm.exceptions", false], [12, "module-elm.ords", false], [13, "module-elm.ords.download", false], [15, "module-elm.ords.extraction", false], [16, "module-elm.ords.extraction.apply", false], [21, "module-elm.ords.extraction.date", false], [23, "module-elm.ords.extraction.features", false], [25, "module-elm.ords.extraction.graphs", false], [35, "module-elm.ords.extraction.ngrams", false], [38, "module-elm.ords.extraction.ordinance", false], [41, "module-elm.ords.extraction.parse", false], [43, "module-elm.ords.extraction.tree", false], [45, "module-elm.ords.llm", false], [46, "module-elm.ords.llm.calling", false], [51, "module-elm.ords.process", false], [55, "module-elm.ords.services", false], [56, "module-elm.ords.services.base", false], [59, "module-elm.ords.services.cpu", false], [64, "module-elm.ords.services.openai", false], [68, "module-elm.ords.services.provider", false], [70, "module-elm.ords.services.queues", false], [74, "module-elm.ords.services.threaded", false], [82, "module-elm.ords.services.usage", false], [86, "module-elm.ords.utilities", false], [87, "module-elm.ords.utilities.counties", false], [91, "module-elm.ords.utilities.exceptions", false], [96, "module-elm.ords.utilities.location", false], [99, "module-elm.ords.utilities.parsing", false], [102, "module-elm.ords.utilities.queued_logging", false], [108, "module-elm.ords.validation", false], [109, "module-elm.ords.validation.content", false], [112, "module-elm.ords.validation.location", false], [118, "module-elm.osti", false], [121, "module-elm.pdf", false], [123, "module-elm.summary", false], [125, "module-elm.tree", false], [127, "module-elm.utilities", false], [128, "module-elm.utilities.parse", false], [141, "module-elm.utilities.retry", false], [144, "module-elm.version", false], [145, "module-elm.web", false], [146, "module-elm.web.document", false], [150, "module-elm.web.file_loader", false], [152, "module-elm.web.google_search", false], [154, "module-elm.web.html_pw", false], [156, "module-elm.web.utilities", false], [160, "module-elm.wizard", false]], "name (cleanedfilewriter property)": [[75, "elm.ords.services.threaded.CleanedFileWriter.name", false]], "name (filemover property)": [[76, "elm.ords.services.threaded.FileMover.name", false]], "name (openaiservice property)": [[65, "elm.ords.services.openai.OpenAIService.name", false]], "name (orddbfilewriter property)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.name", false]], "name (pdfloader property)": [[60, "elm.ords.services.cpu.PDFLoader.name", false]], "name (processpoolservice property)": [[61, "elm.ords.services.cpu.ProcessPoolService.name", false]], "name (ratelimitedservice property)": [[57, "elm.ords.services.base.RateLimitedService.name", false]], "name (service property)": [[58, "elm.ords.services.base.Service.name", false]], "name (storefileondisk property)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.name", false]], "name (tempfilecache property)": [[79, "elm.ords.services.threaded.TempFileCache.name", false]], "name (threadedservice property)": [[80, "elm.ords.services.threaded.ThreadedService.name", false]], "name (usageupdater property)": [[81, "elm.ords.services.threaded.UsageUpdater.name", false]], "nolocationfilter (class in elm.ords.utilities.queued_logging)": [[107, "elm.ords.utilities.queued_logging.NoLocationFilter", false]], "num_raw_pages_to_keep (pdfdocument property)": [[149, "elm.web.document.PDFDocument.num_raw_pages_to_keep", false]], "openaiservice (class in elm.ords.services.openai)": [[65, "elm.ords.services.openai.OpenAIService", false]], "orddbfilewriter (class in elm.ords.services.threaded)": [[77, "elm.ords.services.threaded.OrdDBFileWriter", false]], "ordinance_text (ordinancevalidator property)": [[40, "elm.ords.extraction.ordinance.OrdinanceValidator.ordinance_text", false]], "ordinanceextractor (class in elm.ords.extraction.ordinance)": [[39, "elm.ords.extraction.ordinance.OrdinanceExtractor", false]], "ordinancevalidator (class in elm.ords.extraction.ordinance)": [[40, "elm.ords.extraction.ordinance.OrdinanceValidator", false]], "osti_id (ostirecord property)": [[120, "elm.osti.OstiRecord.osti_id", false]], "ostilist (class in elm.osti)": [[119, "elm.osti.OstiList", false]], "ostirecord (class in elm.osti)": [[120, "elm.osti.OstiRecord", false]], "paragraph_tokens (chunker property)": [[5, "elm.chunk.Chunker.paragraph_tokens", false]], "paragraphs (chunker property)": [[5, "elm.chunk.Chunker.paragraphs", false]], "parse() (dateextractor method)": [[22, "elm.ords.extraction.date.DateExtractor.parse", false]], "parse() (ordinancevalidator method)": [[40, "elm.ords.extraction.ordinance.OrdinanceValidator.parse", false]], "parse() (structuredordinanceparser method)": [[42, "elm.ords.extraction.parse.StructuredOrdinanceParser.parse", false]], "parse_from_ind() (ordinancevalidator method)": [[40, "elm.ords.extraction.ordinance.OrdinanceValidator.parse_from_ind", false]], "parse_from_ind() (validationwithmemory method)": [[110, "elm.ords.validation.content.ValidationWithMemory.parse_from_ind", false]], "pdfdocument (class in elm.web.document)": [[149, "elm.web.document.PDFDocument", false]], "pdfloader (class in elm.ords.services.cpu)": [[60, "elm.ords.services.cpu.PDFLoader", false]], "pdftotxt (class in elm.pdf)": [[122, "elm.pdf.PDFtoTXT", false]], "playwrightgooglelinksearch (class in elm.web.google_search)": [[153, "elm.web.google_search.PlaywrightGoogleLinkSearch", false]], "pop() (ostilist method)": [[119, "elm.osti.OstiList.pop", false]], "pop() (ostirecord method)": [[120, "elm.osti.OstiRecord.pop", false]], "pop() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.pop", false]], "popitem() (ostirecord method)": [[120, "elm.osti.OstiRecord.popitem", false]], "popitem() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.popitem", false]], "possibly_mentions_wind() (in module elm.ords.validation.content)": [[111, "elm.ords.validation.content.possibly_mentions_wind", false]], "preflight_corpus() (energywizard static method)": [[161, "elm.wizard.EnergyWizard.preflight_corpus", false]], "prepare() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.prepare", false]], "process() (cleanedfilewriter method)": [[75, "elm.ords.services.threaded.CleanedFileWriter.process", false]], "process() (filemover method)": [[76, "elm.ords.services.threaded.FileMover.process", false]], "process() (openaiservice method)": [[65, "elm.ords.services.openai.OpenAIService.process", false]], "process() (orddbfilewriter method)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.process", false]], "process() (pdfloader method)": [[60, "elm.ords.services.cpu.PDFLoader.process", false]], "process() (processpoolservice method)": [[61, "elm.ords.services.cpu.ProcessPoolService.process", false]], "process() (ratelimitedservice method)": [[57, "elm.ords.services.base.RateLimitedService.process", false]], "process() (service method)": [[58, "elm.ords.services.base.Service.process", false]], "process() (storefileondisk method)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.process", false]], "process() (tempfilecache method)": [[79, "elm.ords.services.threaded.TempFileCache.process", false]], "process() (threadedservice method)": [[80, "elm.ords.services.threaded.ThreadedService.process", false]], "process() (usageupdater method)": [[81, "elm.ords.services.threaded.UsageUpdater.process", false]], "process_counties_with_openai() (in module elm.ords.process)": [[54, "elm.ords.process.process_counties_with_openai", false]], "process_using_futures() (cleanedfilewriter method)": [[75, "elm.ords.services.threaded.CleanedFileWriter.process_using_futures", false]], "process_using_futures() (filemover method)": [[76, "elm.ords.services.threaded.FileMover.process_using_futures", false]], "process_using_futures() (openaiservice method)": [[65, "elm.ords.services.openai.OpenAIService.process_using_futures", false]], "process_using_futures() (orddbfilewriter method)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.process_using_futures", false]], "process_using_futures() (pdfloader method)": [[60, "elm.ords.services.cpu.PDFLoader.process_using_futures", false]], "process_using_futures() (processpoolservice method)": [[61, "elm.ords.services.cpu.ProcessPoolService.process_using_futures", false]], "process_using_futures() (ratelimitedservice method)": [[57, "elm.ords.services.base.RateLimitedService.process_using_futures", false]], "process_using_futures() (service method)": [[58, "elm.ords.services.base.Service.process_using_futures", false]], "process_using_futures() (storefileondisk method)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.process_using_futures", false]], "process_using_futures() (tempfilecache method)": [[79, "elm.ords.services.threaded.TempFileCache.process_using_futures", false]], "process_using_futures() (threadedservice method)": [[80, "elm.ords.services.threaded.ThreadedService.process_using_futures", false]], "process_using_futures() (usageupdater method)": [[81, "elm.ords.services.threaded.UsageUpdater.process_using_futures", false]], "processpoolservice (class in elm.ords.services.cpu)": [[61, "elm.ords.services.cpu.ProcessPoolService", false]], "rank_strings() (energywizard method)": [[161, "elm.wizard.EnergyWizard.rank_strings", false]], "ratelimitedservice (class in elm.ords.services.base)": [[57, "elm.ords.services.base.RateLimitedService", false]], "raw_pages (basedocument property)": [[147, "elm.web.document.BaseDocument.raw_pages", false]], "raw_pages (htmldocument property)": [[148, "elm.web.document.HTMLDocument.raw_pages", false]], "raw_pages (pdfdocument property)": [[149, "elm.web.document.PDFDocument.raw_pages", false]], "read_pdf() (in module elm.utilities.parse)": [[134, "elm.utilities.parse.read_pdf", false]], "read_pdf_doc() (in module elm.ords.services.cpu)": [[62, "elm.ords.services.cpu.read_pdf_doc", false]], "read_pdf_doc_ocr() (in module elm.ords.services.cpu)": [[63, "elm.ords.services.cpu.read_pdf_doc_ocr", false]], "read_pdf_ocr() (in module elm.utilities.parse)": [[135, "elm.utilities.parse.read_pdf_ocr", false]], "release() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.release", false]], "release_resources() (cleanedfilewriter method)": [[75, "elm.ords.services.threaded.CleanedFileWriter.release_resources", false]], "release_resources() (filemover method)": [[76, "elm.ords.services.threaded.FileMover.release_resources", false]], "release_resources() (openaiservice method)": [[65, "elm.ords.services.openai.OpenAIService.release_resources", false]], "release_resources() (orddbfilewriter method)": [[77, "elm.ords.services.threaded.OrdDBFileWriter.release_resources", false]], "release_resources() (pdfloader method)": [[60, "elm.ords.services.cpu.PDFLoader.release_resources", false]], "release_resources() (processpoolservice method)": [[61, "elm.ords.services.cpu.ProcessPoolService.release_resources", false]], "release_resources() (ratelimitedservice method)": [[57, "elm.ords.services.base.RateLimitedService.release_resources", false]], "release_resources() (service method)": [[58, "elm.ords.services.base.Service.release_resources", false]], "release_resources() (storefileondisk method)": [[78, "elm.ords.services.threaded.StoreFileOnDisk.release_resources", false]], "release_resources() (tempfilecache method)": [[79, "elm.ords.services.threaded.TempFileCache.release_resources", false]], "release_resources() (threadedservice method)": [[80, "elm.ords.services.threaded.ThreadedService.release_resources", false]], "release_resources() (usageupdater method)": [[81, "elm.ords.services.threaded.UsageUpdater.release_resources", false]], "remove() (ostilist method)": [[119, "elm.osti.OstiList.remove", false]], "remove_blank_pages() (in module elm.utilities.parse)": [[136, "elm.utilities.parse.remove_blank_pages", false]], "remove_empty_lines_or_page_footers() (in module elm.utilities.parse)": [[137, "elm.utilities.parse.remove_empty_lines_or_page_footers", false]], "removefilter() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.removeFilter", false]], "removehandler() (loglistener method)": [[106, "elm.ords.utilities.queued_logging.LogListener.removeHandler", false]], "replace_common_pdf_conversion_chars() (in module elm.utilities.parse)": [[138, "elm.utilities.parse.replace_common_pdf_conversion_chars", false]], "replace_excessive_newlines() (in module elm.utilities.parse)": [[139, "elm.utilities.parse.replace_excessive_newlines", false]], "replace_multi_dot_lines() (in module elm.utilities.parse)": [[140, "elm.utilities.parse.replace_multi_dot_lines", false]], "results() (playwrightgooglelinksearch method)": [[153, "elm.web.google_search.PlaywrightGoogleLinkSearch.results", false]], "retry_with_exponential_backoff() (in module elm.utilities.retry)": [[143, "elm.utilities.retry.retry_with_exponential_backoff", false]], "reverse() (ostilist method)": [[119, "elm.osti.OstiList.reverse", false]], "run() (apiqueue method)": [[3, "elm.base.ApiQueue.run", false]], "run() (asyncdecisiontree method)": [[44, "elm.ords.extraction.tree.AsyncDecisionTree.run", false]], "run() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.run", false]], "run() (decisiontree method)": [[126, "elm.tree.DecisionTree.run", false]], "run() (summary method)": [[124, "elm.summary.Summary.run", false]], "run_async() (chunkandembed method)": [[8, "elm.embed.ChunkAndEmbed.run_async", false]], "run_async() (summary method)": [[124, "elm.summary.Summary.run_async", false]], "runningasyncservices (class in elm.ords.services.provider)": [[69, "elm.ords.services.provider.RunningAsyncServices", false]], "sentence_ngram_containment() (in module elm.ords.extraction.ngrams)": [[37, "elm.ords.extraction.ngrams.sentence_ngram_containment", false]], "service (class in elm.ords.services.base)": [[58, "elm.ords.services.base.Service", false]], "setbackfeatures (class in elm.ords.extraction.features)": [[24, "elm.ords.extraction.features.SetbackFeatures", false]], "setdefault() (ostirecord method)": [[120, "elm.osti.OstiRecord.setdefault", false]], "setdefault() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.setdefault", false]], "setformatter() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.setFormatter", false]], "setlevel() (localprocessqueuehandler method)": [[103, "elm.ords.utilities.queued_logging.LocalProcessQueueHandler.setLevel", false]], "setup_base_graph() (in module elm.ords.extraction.graphs)": [[29, "elm.ords.extraction.graphs.setup_base_graph", false]], "setup_conditional() (in module elm.ords.extraction.graphs)": [[30, "elm.ords.extraction.graphs.setup_conditional", false]], "setup_graph_extra_restriction() (in module elm.ords.extraction.graphs)": [[31, "elm.ords.extraction.graphs.setup_graph_extra_restriction", false]], "setup_graph_wes_types() (in module elm.ords.extraction.graphs)": [[32, "elm.ords.extraction.graphs.setup_graph_wes_types", false]], "setup_multiplier() (in module elm.ords.extraction.graphs)": [[33, "elm.ords.extraction.graphs.setup_multiplier", false]], "setup_participating_owner() (in module elm.ords.extraction.graphs)": [[34, "elm.ords.extraction.graphs.setup_participating_owner", false]], "sort() (ostilist method)": [[119, "elm.osti.OstiList.sort", false]], "storefileondisk (class in elm.ords.services.threaded)": [[78, "elm.ords.services.threaded.StoreFileOnDisk", false]], "strip_nested_brackets() (ostirecord static method)": [[120, "elm.osti.OstiRecord.strip_nested_brackets", false]], "structuredllmcaller (class in elm.ords.llm.calling)": [[50, "elm.ords.llm.calling.StructuredLLMCaller", false]], "structuredordinanceparser (class in elm.ords.extraction.parse)": [[42, "elm.ords.extraction.parse.StructuredOrdinanceParser", false]], "submit_jobs() (apiqueue method)": [[3, "elm.base.ApiQueue.submit_jobs", false]], "summary (class in elm.summary)": [[124, "elm.summary.Summary", false]], "system_message (countyjurisdictionvalidator attribute)": [[113, "elm.ords.validation.location.CountyJurisdictionValidator.SYSTEM_MESSAGE", false]], "system_message (countynamevalidator attribute)": [[114, "elm.ords.validation.location.CountyNameValidator.SYSTEM_MESSAGE", false]], "system_message (fixedmessagevalidator attribute)": [[116, "elm.ords.validation.location.FixedMessageValidator.SYSTEM_MESSAGE", false]], "system_message (urlvalidator attribute)": [[117, "elm.ords.validation.location.URLValidator.SYSTEM_MESSAGE", false]], "tear_down_service_queue() (in module elm.ords.services.queues)": [[73, "elm.ords.services.queues.tear_down_service_queue", false]], "tempfilecache (class in elm.ords.services.threaded)": [[79, "elm.ords.services.threaded.TempFileCache", false]], "text (basedocument property)": [[147, "elm.web.document.BaseDocument.text", false]], "text (htmldocument property)": [[148, "elm.web.document.HTMLDocument.text", false]], "text (pdfdocument property)": [[149, "elm.web.document.PDFDocument.text", false]], "threadedservice (class in elm.ords.services.threaded)": [[80, "elm.ords.services.threaded.ThreadedService", false]], "timeboundedusagetracker (class in elm.ords.services.usage)": [[83, "elm.ords.services.usage.TimeBoundedUsageTracker", false]], "timedentry (class in elm.ords.services.usage)": [[84, "elm.ords.services.usage.TimedEntry", false]], "title (ostirecord property)": [[120, "elm.osti.OstiRecord.title", false]], "total (timeboundedusagetracker property)": [[83, "elm.ords.services.usage.TimeBoundedUsageTracker.total", false]], "totals (usagetracker property)": [[85, "elm.ords.services.usage.UsageTracker.totals", false]], "update() (ostirecord method)": [[120, "elm.osti.OstiRecord.update", false]], "update() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.update", false]], "update_from_model() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.update_from_model", false]], "url (apibase attribute)": [[2, "elm.base.ApiBase.URL", false]], "url (chunkandembed attribute)": [[8, "elm.embed.ChunkAndEmbed.URL", false]], "url (chunker attribute)": [[5, "elm.chunk.Chunker.URL", false]], "url (energywizard attribute)": [[161, "elm.wizard.EnergyWizard.URL", false]], "url (ostirecord property)": [[120, "elm.osti.OstiRecord.url", false]], "url (pdftotxt attribute)": [[122, "elm.pdf.PDFtoTXT.URL", false]], "url (summary attribute)": [[124, "elm.summary.Summary.URL", false]], "urlvalidator (class in elm.ords.validation.location)": [[117, "elm.ords.validation.location.URLValidator", false]], "usage_from_response() (in module elm.ords.services.openai)": [[67, "elm.ords.services.openai.usage_from_response", false]], "usagetracker (class in elm.ords.services.usage)": [[85, "elm.ords.services.usage.UsageTracker", false]], "usageupdater (class in elm.ords.services.threaded)": [[81, "elm.ords.services.threaded.UsageUpdater", false]], "validate_clean() (pdftotxt method)": [[122, "elm.pdf.PDFtoTXT.validate_clean", false]], "validationwithmemory (class in elm.ords.validation.content)": [[110, "elm.ords.validation.content.ValidationWithMemory", false]], "values() (ostirecord method)": [[120, "elm.osti.OstiRecord.values", false]], "values() (usagetracker method)": [[85, "elm.ords.services.usage.UsageTracker.values", false]], "waiting_on (apiqueue property)": [[3, "elm.base.ApiQueue.waiting_on", false]], "write_kwargs (basedocument property)": [[147, "elm.web.document.BaseDocument.WRITE_KWARGS", false]], "write_url_doc_to_file() (in module elm.web.utilities)": [[159, "elm.web.utilities.write_url_doc_to_file", false]], "year (ostirecord property)": [[120, "elm.osti.OstiRecord.year", false]]}, "objects": {"": [[0, 0, 0, "-", "elm"]], "elm": [[1, 0, 0, "-", "base"], [4, 0, 0, "-", "chunk"], [6, 0, 0, "-", "cli"], [7, 0, 0, "-", "embed"], [9, 0, 0, "-", "exceptions"], [12, 0, 0, "-", "ords"], [118, 0, 0, "-", "osti"], [121, 0, 0, "-", "pdf"], [123, 0, 0, "-", "summary"], [125, 0, 0, "-", "tree"], [127, 0, 0, "-", "utilities"], [144, 0, 0, "-", "version"], [145, 0, 0, "-", "web"], [160, 0, 0, "-", "wizard"], [163, 7, 1, "cmdoption-elm-version", "--version"]], "elm-ords": [[163, 7, 1, "cmdoption-elm-ords-c", "--config"], [163, 7, 1, "cmdoption-elm-ords-v", "--verbose"], [163, 7, 1, "cmdoption-elm-ords-c", "-c"], [163, 7, 1, "cmdoption-elm-ords-v", "-v"]], "elm.base": [[2, 1, 1, "", "ApiBase"], [3, 1, 1, "", "ApiQueue"]], "elm.base.ApiBase": [[2, 2, 1, "", "DEFAULT_MODEL"], [2, 2, 1, "", "EMBEDDING_MODEL"], [2, 2, 1, "", "EMBEDDING_URL"], [2, 2, 1, "", "HEADERS"], [2, 2, 1, "", "MODEL_ROLE"], [2, 2, 1, "", "URL"], [2, 3, 1, "", "all_messages_txt"], [2, 4, 1, "", "call_api"], [2, 4, 1, "", "call_api_async"], [2, 4, 1, "", "chat"], [2, 4, 1, "", "clear"], [2, 4, 1, "", "count_tokens"], [2, 4, 1, "", "generic_async_query"], [2, 4, 1, "", "generic_query"], [2, 4, 1, "", "get_embedding"]], "elm.base.ApiQueue": [[3, 4, 1, "", "collect_jobs"], [3, 4, 1, "", "run"], [3, 4, 1, "", "submit_jobs"], [3, 3, 1, "", "waiting_on"]], "elm.chunk": [[5, 1, 1, "", "Chunker"]], "elm.chunk.Chunker": [[5, 2, 1, "", "DEFAULT_MODEL"], [5, 2, 1, "", "EMBEDDING_MODEL"], [5, 2, 1, "", "EMBEDDING_URL"], [5, 2, 1, "", "HEADERS"], [5, 2, 1, "", "MODEL_ROLE"], [5, 2, 1, "", "URL"], [5, 4, 1, "", "add_overlap"], [5, 3, 1, "", "all_messages_txt"], [5, 4, 1, "", "call_api"], [5, 4, 1, "", "call_api_async"], [5, 4, 1, "", "chat"], [5, 4, 1, "", "chunk_text"], [5, 3, 1, "", "chunk_tokens"], [5, 3, 1, "", "chunks"], [5, 4, 1, "", "clean_paragraphs"], [5, 4, 1, "", "clear"], [5, 4, 1, "", "count_tokens"], [5, 4, 1, "", "generic_async_query"], [5, 4, 1, "", "generic_query"], [5, 4, 1, "", "get_embedding"], [5, 4, 1, "", "is_good_paragraph"], [5, 4, 1, "", "merge_chunks"], [5, 3, 1, "", "paragraph_tokens"], [5, 3, 1, "", "paragraphs"]], "elm.embed": [[8, 1, 1, "", "ChunkAndEmbed"]], "elm.embed.ChunkAndEmbed": [[8, 2, 1, "", "DEFAULT_MODEL"], [8, 2, 1, "", "EMBEDDING_MODEL"], [8, 2, 1, "", "EMBEDDING_URL"], [8, 2, 1, "", "HEADERS"], [8, 2, 1, "", "MODEL_ROLE"], [8, 2, 1, "", "URL"], [8, 3, 1, "", "all_messages_txt"], [8, 4, 1, "", "call_api"], [8, 4, 1, "", "call_api_async"], [8, 4, 1, "", "chat"], [8, 4, 1, "", "clean_tables"], [8, 4, 1, "", "clear"], [8, 4, 1, "", "count_tokens"], [8, 4, 1, "", "generic_async_query"], [8, 4, 1, "", "generic_query"], [8, 4, 1, "", "get_embedding"], [8, 4, 1, "", "run"], [8, 4, 1, "", "run_async"]], "elm.exceptions": [[10, 5, 1, "", "ELMError"], [11, 5, 1, "", "ELMRuntimeError"]], "elm.ords": [[13, 0, 0, "-", "download"], [15, 0, 0, "-", "extraction"], [45, 0, 0, "-", "llm"], [51, 0, 0, "-", "process"], [55, 0, 0, "-", "services"], [86, 0, 0, "-", "utilities"], [108, 0, 0, "-", "validation"]], "elm.ords.download": [[14, 6, 1, "", "download_county_ordinance"]], "elm.ords.extraction": [[16, 0, 0, "-", "apply"], [21, 0, 0, "-", "date"], [23, 0, 0, "-", "features"], [25, 0, 0, "-", "graphs"], [35, 0, 0, "-", "ngrams"], [38, 0, 0, "-", "ordinance"], [41, 0, 0, "-", "parse"], [43, 0, 0, "-", "tree"]], "elm.ords.extraction.apply": [[17, 6, 1, "", "check_for_ordinance_info"], [18, 6, 1, "", "extract_ordinance_text_with_llm"], [19, 6, 1, "", "extract_ordinance_text_with_ngram_validation"], [20, 6, 1, "", "extract_ordinance_values"]], "elm.ords.extraction.date": [[22, 1, 1, "", "DateExtractor"]], "elm.ords.extraction.date.DateExtractor": [[22, 4, 1, "", "parse"]], "elm.ords.extraction.features": [[24, 1, 1, "", "SetbackFeatures"]], "elm.ords.extraction.graphs": [[26, 6, 1, "", "llm_response_does_not_start_with_no"], [27, 6, 1, "", "llm_response_starts_with_no"], [28, 6, 1, "", "llm_response_starts_with_yes"], [29, 6, 1, "", "setup_base_graph"], [30, 6, 1, "", "setup_conditional"], [31, 6, 1, "", "setup_graph_extra_restriction"], [32, 6, 1, "", "setup_graph_wes_types"], [33, 6, 1, "", "setup_multiplier"], [34, 6, 1, "", "setup_participating_owner"]], "elm.ords.extraction.ngrams": [[36, 6, 1, "", "convert_text_to_sentence_ngrams"], [37, 6, 1, "", "sentence_ngram_containment"]], "elm.ords.extraction.ordinance": [[39, 1, 1, "", "OrdinanceExtractor"], [40, 1, 1, "", "OrdinanceValidator"]], "elm.ords.extraction.ordinance.OrdinanceExtractor": [[39, 4, 1, "", "check_for_correct_size"], [39, 4, 1, "", "check_for_restrictions"]], "elm.ords.extraction.ordinance.OrdinanceValidator": [[40, 3, 1, "", "is_legal_text"], [40, 3, 1, "", "ordinance_text"], [40, 4, 1, "", "parse"], [40, 4, 1, "", "parse_from_ind"]], "elm.ords.extraction.parse": [[42, 1, 1, "", "StructuredOrdinanceParser"]], "elm.ords.extraction.parse.StructuredOrdinanceParser": [[42, 4, 1, "", "parse"]], "elm.ords.extraction.tree": [[44, 1, 1, "", "AsyncDecisionTree"]], "elm.ords.extraction.tree.AsyncDecisionTree": [[44, 3, 1, "", "all_messages_txt"], [44, 3, 1, "", "api"], [44, 4, 1, "", "async_call_node"], [44, 4, 1, "", "async_run"], [44, 4, 1, "", "call_node"], [44, 3, 1, "", "chat_llm_caller"], [44, 3, 1, "", "graph"], [44, 3, 1, "", "history"], [44, 3, 1, "", "messages"], [44, 4, 1, "", "run"]], "elm.ords.llm": [[46, 0, 0, "-", "calling"]], "elm.ords.llm.calling": [[47, 1, 1, "", "BaseLLMCaller"], [48, 1, 1, "", "ChatLLMCaller"], [49, 1, 1, "", "LLMCaller"], [50, 1, 1, "", "StructuredLLMCaller"]], "elm.ords.llm.calling.ChatLLMCaller": [[48, 4, 1, "", "call"]], "elm.ords.llm.calling.LLMCaller": [[49, 4, 1, "", "call"]], "elm.ords.llm.calling.StructuredLLMCaller": [[50, 4, 1, "", "call"]], "elm.ords.process": [[52, 6, 1, "", "download_doc_for_county"], [53, 6, 1, "", "download_docs_for_county_with_logging"], [54, 6, 1, "", "process_counties_with_openai"]], "elm.ords.services": [[56, 0, 0, "-", "base"], [59, 0, 0, "-", "cpu"], [64, 0, 0, "-", "openai"], [68, 0, 0, "-", "provider"], [70, 0, 0, "-", "queues"], [74, 0, 0, "-", "threaded"], [82, 0, 0, "-", "usage"]], "elm.ords.services.base": [[57, 1, 1, "", "RateLimitedService"], [58, 1, 1, "", "Service"]], "elm.ords.services.base.RateLimitedService": [[57, 2, 1, "", "MAX_CONCURRENT_JOBS"], [57, 4, 1, "", "acquire_resources"], [57, 4, 1, "", "call"], [57, 3, 1, "", "can_process"], [57, 3, 1, "", "name"], [57, 4, 1, "", "process"], [57, 4, 1, "", "process_using_futures"], [57, 4, 1, "", "release_resources"]], "elm.ords.services.base.Service": [[58, 2, 1, "", "MAX_CONCURRENT_JOBS"], [58, 4, 1, "", "acquire_resources"], [58, 4, 1, "", "call"], [58, 3, 1, "", "can_process"], [58, 3, 1, "", "name"], [58, 4, 1, "", "process"], [58, 4, 1, "", "process_using_futures"], [58, 4, 1, "", "release_resources"]], "elm.ords.services.cpu": [[60, 1, 1, "", "PDFLoader"], [61, 1, 1, "", "ProcessPoolService"], [62, 6, 1, "", "read_pdf_doc"], [63, 6, 1, "", "read_pdf_doc_ocr"]], "elm.ords.services.cpu.PDFLoader": [[60, 2, 1, "", "MAX_CONCURRENT_JOBS"], [60, 4, 1, "", "acquire_resources"], [60, 4, 1, "", "call"], [60, 3, 1, "", "can_process"], [60, 3, 1, "", "name"], [60, 4, 1, "", "process"], [60, 4, 1, "", "process_using_futures"], [60, 4, 1, "", "release_resources"]], "elm.ords.services.cpu.ProcessPoolService": [[61, 2, 1, "", "MAX_CONCURRENT_JOBS"], [61, 4, 1, "", "acquire_resources"], [61, 4, 1, "", "call"], [61, 3, 1, "", "can_process"], [61, 3, 1, "", "name"], [61, 4, 1, "", "process"], [61, 4, 1, "", "process_using_futures"], [61, 4, 1, "", "release_resources"]], "elm.ords.services.openai": [[65, 1, 1, "", "OpenAIService"], [66, 6, 1, "", "count_tokens"], [67, 6, 1, "", "usage_from_response"]], "elm.ords.services.openai.OpenAIService": [[65, 2, 1, "", "MAX_CONCURRENT_JOBS"], [65, 4, 1, "", "acquire_resources"], [65, 4, 1, "", "call"], [65, 3, 1, "", "can_process"], [65, 3, 1, "", "name"], [65, 4, 1, "", "process"], [65, 4, 1, "", "process_using_futures"], [65, 4, 1, "", "release_resources"]], "elm.ords.services.provider": [[69, 1, 1, "", "RunningAsyncServices"]], "elm.ords.services.queues": [[71, 6, 1, "", "get_service_queue"], [72, 6, 1, "", "initialize_service_queue"], [73, 6, 1, "", "tear_down_service_queue"]], "elm.ords.services.threaded": [[75, 1, 1, "", "CleanedFileWriter"], [76, 1, 1, "", "FileMover"], [77, 1, 1, "", "OrdDBFileWriter"], [78, 1, 1, "", "StoreFileOnDisk"], [79, 1, 1, "", "TempFileCache"], [80, 1, 1, "", "ThreadedService"], [81, 1, 1, "", "UsageUpdater"]], "elm.ords.services.threaded.CleanedFileWriter": [[75, 2, 1, "", "MAX_CONCURRENT_JOBS"], [75, 4, 1, "", "acquire_resources"], [75, 4, 1, "", "call"], [75, 3, 1, "", "can_process"], [75, 3, 1, "", "name"], [75, 4, 1, "", "process"], [75, 4, 1, "", "process_using_futures"], [75, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.FileMover": [[76, 2, 1, "", "MAX_CONCURRENT_JOBS"], [76, 4, 1, "", "acquire_resources"], [76, 4, 1, "", "call"], [76, 3, 1, "", "can_process"], [76, 3, 1, "", "name"], [76, 4, 1, "", "process"], [76, 4, 1, "", "process_using_futures"], [76, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.OrdDBFileWriter": [[77, 2, 1, "", "MAX_CONCURRENT_JOBS"], [77, 4, 1, "", "acquire_resources"], [77, 4, 1, "", "call"], [77, 3, 1, "", "can_process"], [77, 3, 1, "", "name"], [77, 4, 1, "", "process"], [77, 4, 1, "", "process_using_futures"], [77, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.StoreFileOnDisk": [[78, 2, 1, "", "MAX_CONCURRENT_JOBS"], [78, 4, 1, "", "acquire_resources"], [78, 4, 1, "", "call"], [78, 3, 1, "", "can_process"], [78, 3, 1, "", "name"], [78, 4, 1, "", "process"], [78, 4, 1, "", "process_using_futures"], [78, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.TempFileCache": [[79, 2, 1, "", "MAX_CONCURRENT_JOBS"], [79, 4, 1, "", "acquire_resources"], [79, 4, 1, "", "call"], [79, 3, 1, "", "can_process"], [79, 3, 1, "", "name"], [79, 4, 1, "", "process"], [79, 4, 1, "", "process_using_futures"], [79, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.ThreadedService": [[80, 2, 1, "", "MAX_CONCURRENT_JOBS"], [80, 4, 1, "", "acquire_resources"], [80, 4, 1, "", "call"], [80, 3, 1, "", "can_process"], [80, 3, 1, "", "name"], [80, 4, 1, "", "process"], [80, 4, 1, "", "process_using_futures"], [80, 4, 1, "", "release_resources"]], "elm.ords.services.threaded.UsageUpdater": [[81, 2, 1, "", "MAX_CONCURRENT_JOBS"], [81, 4, 1, "", "acquire_resources"], [81, 4, 1, "", "call"], [81, 3, 1, "", "can_process"], [81, 3, 1, "", "name"], [81, 4, 1, "", "process"], [81, 4, 1, "", "process_using_futures"], [81, 4, 1, "", "release_resources"]], "elm.ords.services.usage": [[83, 1, 1, "", "TimeBoundedUsageTracker"], [84, 1, 1, "", "TimedEntry"], [85, 1, 1, "", "UsageTracker"]], "elm.ords.services.usage.TimeBoundedUsageTracker": [[83, 4, 1, "", "add"], [83, 3, 1, "", "total"]], "elm.ords.services.usage.UsageTracker": [[85, 4, 1, "", "add_to"], [85, 4, 1, "", "clear"], [85, 4, 1, "", "get"], [85, 4, 1, "", "items"], [85, 4, 1, "", "keys"], [85, 4, 1, "", "pop"], [85, 4, 1, "", "popitem"], [85, 4, 1, "", "setdefault"], [85, 3, 1, "", "totals"], [85, 4, 1, "", "update"], [85, 4, 1, "", "update_from_model"], [85, 4, 1, "", "values"]], "elm.ords.utilities": [[87, 0, 0, "-", "counties"], [91, 0, 0, "-", "exceptions"], [96, 0, 0, "-", "location"], [99, 0, 0, "-", "parsing"], [102, 0, 0, "-", "queued_logging"]], "elm.ords.utilities.counties": [[88, 6, 1, "", "county_websites"], [89, 6, 1, "", "load_all_county_info"], [90, 6, 1, "", "load_counties_from_fp"]], "elm.ords.utilities.exceptions": [[92, 5, 1, "", "ELMOrdsError"], [93, 5, 1, "", "ELMOrdsNotInitializedError"], [94, 5, 1, "", "ELMOrdsRuntimeError"], [95, 5, 1, "", "ELMOrdsValueError"]], "elm.ords.utilities.location": [[97, 1, 1, "", "County"], [98, 1, 1, "", "Location"]], "elm.ords.utilities.location.County": [[97, 3, 1, "", "full_name"]], "elm.ords.utilities.location.Location": [[98, 3, 1, "", "full_name"]], "elm.ords.utilities.parsing": [[100, 6, 1, "", "llm_response_as_json"], [101, 6, 1, "", "merge_overlapping_texts"]], "elm.ords.utilities.queued_logging": [[103, 1, 1, "", "LocalProcessQueueHandler"], [104, 1, 1, "", "LocationFileLog"], [105, 1, 1, "", "LocationFilter"], [106, 1, 1, "", "LogListener"], [107, 1, 1, "", "NoLocationFilter"]], "elm.ords.utilities.queued_logging.LocalProcessQueueHandler": [[103, 4, 1, "", "acquire"], [103, 4, 1, "", "addFilter"], [103, 4, 1, "", "close"], [103, 4, 1, "", "createLock"], [103, 4, 1, "", "emit"], [103, 4, 1, "", "enqueue"], [103, 4, 1, "", "filter"], [103, 4, 1, "", "flush"], [103, 4, 1, "", "format"], [103, 4, 1, "", "handle"], [103, 4, 1, "", "handleError"], [103, 4, 1, "", "prepare"], [103, 4, 1, "", "release"], [103, 4, 1, "", "removeFilter"], [103, 4, 1, "", "setFormatter"], [103, 4, 1, "", "setLevel"]], "elm.ords.utilities.queued_logging.LocationFilter": [[105, 4, 1, "", "filter"]], "elm.ords.utilities.queued_logging.LogListener": [[106, 4, 1, "", "addHandler"], [106, 4, 1, "", "removeHandler"]], "elm.ords.utilities.queued_logging.NoLocationFilter": [[107, 4, 1, "", "filter"]], "elm.ords.validation": [[109, 0, 0, "-", "content"], [112, 0, 0, "-", "location"]], "elm.ords.validation.content": [[110, 1, 1, "", "ValidationWithMemory"], [111, 6, 1, "", "possibly_mentions_wind"]], "elm.ords.validation.content.ValidationWithMemory": [[110, 4, 1, "", "parse_from_ind"]], "elm.ords.validation.location": [[113, 1, 1, "", "CountyJurisdictionValidator"], [114, 1, 1, "", "CountyNameValidator"], [115, 1, 1, "", "CountyValidator"], [116, 1, 1, "", "FixedMessageValidator"], [117, 1, 1, "", "URLValidator"]], "elm.ords.validation.location.CountyJurisdictionValidator": [[113, 2, 1, "", "SYSTEM_MESSAGE"], [113, 4, 1, "", "check"]], "elm.ords.validation.location.CountyNameValidator": [[114, 2, 1, "", "SYSTEM_MESSAGE"], [114, 4, 1, "", "check"]], "elm.ords.validation.location.CountyValidator": [[115, 4, 1, "", "check"]], "elm.ords.validation.location.FixedMessageValidator": [[116, 2, 1, "", "SYSTEM_MESSAGE"], [116, 4, 1, "", "check"]], "elm.ords.validation.location.URLValidator": [[117, 2, 1, "", "SYSTEM_MESSAGE"], [117, 4, 1, "", "check"]], "elm.osti": [[119, 1, 1, "", "OstiList"], [120, 1, 1, "", "OstiRecord"]], "elm.osti.OstiList": [[119, 2, 1, "", "BASE_URL"], [119, 4, 1, "", "__add__"], [119, 4, 1, "", "__mul__"], [119, 4, 1, "", "append"], [119, 4, 1, "", "clear"], [119, 4, 1, "", "copy"], [119, 4, 1, "", "count"], [119, 4, 1, "", "download"], [119, 4, 1, "", "extend"], [119, 4, 1, "", "from_osti_ids"], [119, 4, 1, "", "index"], [119, 4, 1, "", "insert"], [119, 3, 1, "", "meta"], [119, 4, 1, "", "pop"], [119, 4, 1, "", "remove"], [119, 4, 1, "", "reverse"], [119, 4, 1, "", "sort"]], "elm.osti.OstiRecord": [[120, 3, 1, "", "authors"], [120, 4, 1, "", "clear"], [120, 4, 1, "", "copy"], [120, 3, 1, "", "date"], [120, 3, 1, "", "doi"], [120, 4, 1, "", "download"], [120, 4, 1, "", "fromkeys"], [120, 4, 1, "", "get"], [120, 4, 1, "", "items"], [120, 4, 1, "", "keys"], [120, 3, 1, "", "osti_id"], [120, 4, 1, "", "pop"], [120, 4, 1, "", "popitem"], [120, 4, 1, "", "setdefault"], [120, 4, 1, "", "strip_nested_brackets"], [120, 3, 1, "", "title"], [120, 4, 1, "", "update"], [120, 3, 1, "", "url"], [120, 4, 1, "", "values"], [120, 3, 1, "", "year"]], "elm.pdf": [[122, 1, 1, "", "PDFtoTXT"]], "elm.pdf.PDFtoTXT": [[122, 2, 1, "", "DEFAULT_MODEL"], [122, 2, 1, "", "EMBEDDING_MODEL"], [122, 2, 1, "", "EMBEDDING_URL"], [122, 2, 1, "", "HEADERS"], [122, 2, 1, "", "MODEL_INSTRUCTION"], [122, 2, 1, "", "MODEL_ROLE"], [122, 2, 1, "", "URL"], [122, 3, 1, "", "all_messages_txt"], [122, 4, 1, "", "call_api"], [122, 4, 1, "", "call_api_async"], [122, 4, 1, "", "chat"], [122, 4, 1, "", "clean_headers"], [122, 4, 1, "", "clean_poppler"], [122, 4, 1, "", "clean_txt"], [122, 4, 1, "", "clean_txt_async"], [122, 4, 1, "", "clear"], [122, 4, 1, "", "count_tokens"], [122, 4, 1, "", "generic_async_query"], [122, 4, 1, "", "generic_query"], [122, 4, 1, "", "get_embedding"], [122, 4, 1, "", "is_double_col"], [122, 4, 1, "", "load_pdf"], [122, 4, 1, "", "make_gpt_messages"], [122, 4, 1, "", "validate_clean"]], "elm.summary": [[124, 1, 1, "", "Summary"]], "elm.summary.Summary": [[124, 2, 1, "", "DEFAULT_MODEL"], [124, 2, 1, "", "EMBEDDING_MODEL"], [124, 2, 1, "", "EMBEDDING_URL"], [124, 2, 1, "", "HEADERS"], [124, 2, 1, "", "MODEL_INSTRUCTION"], [124, 2, 1, "", "MODEL_ROLE"], [124, 2, 1, "", "URL"], [124, 3, 1, "", "all_messages_txt"], [124, 4, 1, "", "call_api"], [124, 4, 1, "", "call_api_async"], [124, 4, 1, "", "chat"], [124, 4, 1, "", "clear"], [124, 4, 1, "", "combine"], [124, 4, 1, "", "count_tokens"], [124, 4, 1, "", "generic_async_query"], [124, 4, 1, "", "generic_query"], [124, 4, 1, "", "get_embedding"], [124, 4, 1, "", "run"], [124, 4, 1, "", "run_async"]], "elm.tree": [[126, 1, 1, "", "DecisionTree"]], "elm.tree.DecisionTree": [[126, 3, 1, "", "all_messages_txt"], [126, 3, 1, "", "api"], [126, 4, 1, "", "call_node"], [126, 3, 1, "", "graph"], [126, 3, 1, "", "history"], [126, 3, 1, "", "messages"], [126, 4, 1, "", "run"]], "elm.utilities": [[128, 0, 0, "-", "parse"], [141, 0, 0, "-", "retry"]], "elm.utilities.parse": [[129, 6, 1, "", "clean_headers"], [130, 6, 1, "", "combine_pages"], [131, 6, 1, "", "format_html_tables"], [132, 6, 1, "", "html_to_text"], [133, 6, 1, "", "is_multi_col"], [134, 6, 1, "", "read_pdf"], [135, 6, 1, "", "read_pdf_ocr"], [136, 6, 1, "", "remove_blank_pages"], [137, 6, 1, "", "remove_empty_lines_or_page_footers"], [138, 6, 1, "", "replace_common_pdf_conversion_chars"], [139, 6, 1, "", "replace_excessive_newlines"], [140, 6, 1, "", "replace_multi_dot_lines"]], "elm.utilities.retry": [[142, 6, 1, "", "async_retry_with_exponential_backoff"], [143, 6, 1, "", "retry_with_exponential_backoff"]], "elm.web": [[146, 0, 0, "-", "document"], [150, 0, 0, "-", "file_loader"], [152, 0, 0, "-", "google_search"], [154, 0, 0, "-", "html_pw"], [156, 0, 0, "-", "utilities"]], "elm.web.document": [[147, 1, 1, "", "BaseDocument"], [148, 1, 1, "", "HTMLDocument"], [149, 1, 1, "", "PDFDocument"]], "elm.web.document.BaseDocument": [[147, 3, 1, "", "FILE_EXTENSION"], [147, 3, 1, "", "WRITE_KWARGS"], [147, 3, 1, "", "empty"], [147, 3, 1, "", "raw_pages"], [147, 3, 1, "", "text"]], "elm.web.document.HTMLDocument": [[148, 2, 1, "", "HTML_TABLE_TO_MARKDOWN_KWARGS"], [148, 3, 1, "", "empty"], [148, 3, 1, "", "raw_pages"], [148, 3, 1, "", "text"]], "elm.web.document.PDFDocument": [[149, 2, 1, "", "CLEAN_HEADER_KWARGS"], [149, 3, 1, "", "empty"], [149, 3, 1, "", "num_raw_pages_to_keep"], [149, 3, 1, "", "raw_pages"], [149, 3, 1, "", "text"]], "elm.web.file_loader": [[151, 1, 1, "", "AsyncFileLoader"]], "elm.web.file_loader.AsyncFileLoader": [[151, 2, 1, "", "DEFAULT_HEADER_TEMPLATE"], [151, 4, 1, "", "fetch"], [151, 4, 1, "", "fetch_all"]], "elm.web.google_search": [[153, 1, 1, "", "PlaywrightGoogleLinkSearch"]], "elm.web.google_search.PlaywrightGoogleLinkSearch": [[153, 2, 1, "", "EXPECTED_RESULTS_PER_PAGE"], [153, 4, 1, "", "results"]], "elm.web.html_pw": [[155, 6, 1, "", "load_html_with_pw"]], "elm.web.utilities": [[157, 6, 1, "", "clean_search_query"], [158, 6, 1, "", "compute_fn_from_url"], [159, 6, 1, "", "write_url_doc_to_file"]], "elm.wizard": [[161, 1, 1, "", "EnergyWizard"]], "elm.wizard.EnergyWizard": [[161, 2, 1, "", "DEFAULT_MODEL"], [161, 2, 1, "", "EMBEDDING_MODEL"], [161, 2, 1, "", "EMBEDDING_URL"], [161, 2, 1, "", "HEADERS"], [161, 2, 1, "", "MODEL_INSTRUCTION"], [161, 2, 1, "", "MODEL_ROLE"], [161, 2, 1, "", "URL"], [161, 3, 1, "", "all_messages_txt"], [161, 4, 1, "", "call_api"], [161, 4, 1, "", "call_api_async"], [161, 4, 1, "", "chat"], [161, 4, 1, "", "clear"], [161, 4, 1, "", "cosine_dist"], [161, 4, 1, "", "count_tokens"], [161, 4, 1, "", "engineer_query"], [161, 4, 1, "", "generic_async_query"], [161, 4, 1, "", "generic_query"], [161, 4, 1, "", "get_embedding"], [161, 4, 1, "", "make_ref_list"], [161, 4, 1, "", "preflight_corpus"], [161, 4, 1, "", "rank_strings"]]}, "objnames": {"0": ["py", "module", "Python module"], "1": ["py", "class", "Python class"], "2": ["py", "attribute", "Python attribute"], "3": ["py", "property", "Python property"], "4": ["py", "method", "Python method"], "5": ["py", "exception", "Python exception"], "6": ["py", "function", "Python function"], "7": ["std", "cmdoption", "program option"]}, "objtypes": {"0": "py:module", "1": "py:class", "2": "py:attribute", "3": "py:property", "4": "py:method", "5": "py:exception", "6": "py:function", "7": "std:cmdoption"}, "terms": {"": [14, 17, 18, 19, 20, 52, 53, 54, 65, 83, 85, 105, 107, 120, 126, 148, 151, 167, 168], "0": [2, 3, 5, 8, 19, 65, 115, 119, 122, 124, 129, 142, 143, 149, 151, 161], "002": [2, 5, 8, 122, 124, 161], "05": 102, "08go28308": 168, "1": [2, 3, 5, 8, 19, 40, 110, 111, 119, 122, 124, 129, 142, 143, 149, 151, 157, 161, 168, 169], "10": [3, 54, 153], "100": [84, 101, 161], "1000": [65, 84, 167], "10000": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "11": 102, "128": 158, "16": [142, 143], "164": 158, "175000": 8, "18": 149, "1d": [2, 5, 8, 122, 124, 161], "1e3": 65, "2": [2, 3, 5, 8, 40, 85, 103, 110, 120, 122, 124, 129, 149, 161, 168, 169], "20": [119, 166], "2019": 102, "2023": [2, 3, 5, 8, 122, 124, 161], "25": 149, "3": [2, 3, 5, 8, 19, 22, 40, 103, 122, 124, 142, 143, 149, 161], "300": [54, 101], "3000": 54, "35": 126, "3500": 161, "350k": 8, "4": [2, 3, 5, 8, 19, 54, 122, 124, 142, 143, 161, 168], "400": 124, "4000": 54, "40000": [2, 3, 5, 8, 122, 124, 161], "4096": 161, "5": [2, 3, 5, 8, 14, 52, 53, 54, 122, 124, 151, 161], "50": 153, "500": [5, 124], "51485656": 83, "5f": 148, "6": [122, 129, 149], "600": 124, "65": 83, "7": [119, 120, 161], "70": 83, "700": 167, "8": [115, 122, 129, 149, 151], "85": 167, "9": 151, "90k": [2, 3, 5, 8, 122, 124, 161], "9223372036854775807": 119, "95": 19, "A": [17, 18, 19, 20, 40, 44, 57, 58, 60, 61, 65, 66, 75, 76, 77, 78, 79, 80, 81, 83, 85, 104, 110, 126, 161], "Be": 167, "By": [14, 19, 40, 42, 47, 48, 49, 50, 52, 53, 54, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 83, 85, 88, 97, 101, 104, 106, 110, 111, 115, 132, 134, 135, 142, 143, 147, 148, 149, 151, 153, 155, 158, 159], "For": [40, 57, 67, 104, 105, 110, 153, 168], "IN": 119, "If": [2, 3, 5, 8, 14, 17, 19, 40, 50, 52, 53, 54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 85, 88, 103, 105, 107, 110, 111, 119, 120, 122, 124, 151, 153, 155, 161, 168, 169], "In": [17, 18, 19, 20, 63, 85, 120, 158, 166], "It": [40, 83, 110], "NOT": 117, "No": [26, 27, 167, 168], "Not": 167, "The": [14, 17, 18, 19, 20, 36, 39, 40, 48, 49, 50, 52, 53, 54, 66, 73, 85, 103, 104, 105, 106, 110, 113, 114, 116, 117, 119, 124, 142, 143, 148, 149, 161, 165, 167, 168], "Then": 111, "There": 167, "These": [30, 38, 42, 47, 48, 49, 50, 109, 112], "To": [17, 135, 142, 143], "__add__": 119, "__mul__": 119, "_handler": 103, "abc": [2, 58, 98, 116, 147], "about": [2, 3, 5, 8, 60, 75, 76, 77, 78, 79, 103, 114, 117, 122, 124, 159, 161], "abov": [67, 113, 114, 122, 124], "abstract": [1, 56, 57, 58, 61, 78, 80, 98, 147], "ac36": 168, "accept": [58, 61, 80, 151, 168], "access": 103, "accord": 119, "accordingli": [142, 143], "account": 161, "acquir": 103, "acquire_resourc": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "acquisit": 103, "acronym": 111, "across": [85, 122, 129], "act": [44, 126], "activ": [53, 168, 169], "actual": 103, "acycl": [44, 126], "ad": [50, 60, 67, 79, 81, 83, 84, 85, 103, 104, 122, 159, 161], "ada": [2, 5, 8, 122, 124, 161], "add": [5, 29, 30, 31, 32, 33, 34, 81, 83, 85, 103, 106, 158, 167], "add_edg": 126, "add_nod": 126, "add_overlap": 5, "add_to": 85, "adder": [142, 143], "addfilt": 103, "addhandl": 106, "addit": 149, "adjust": 167, "affect": 138, "after": [30, 36, 122, 129, 157], "ag": 83, "again": [2, 3, 5, 8, 122, 124, 161], "against": [37, 105], "agent": 151, "aget_kwarg": 151, "aiohttp": 151, "aip": 85, "alik": 111, "aliv": 151, "all": [3, 8, 17, 36, 54, 81, 83, 85, 89, 90, 103, 107, 113, 119, 120, 122, 124, 129, 131, 151, 157, 161, 163, 167], "all_messages_txt": [2, 5, 8, 44, 122, 124, 126, 161], "all_request_json": [2, 3, 5, 8, 122, 124, 161], "allianc": 168, "alloc": [57, 58, 65], "allow": [67, 103, 107, 167, 168], "along": [2, 5, 8, 122, 124, 161], "alreadi": [67, 119], "also": [14, 17, 19, 40, 42, 52, 53, 54, 83, 85, 110, 135, 161, 166, 167], "alwai": [37, 60, 75, 76, 77, 78, 79, 158], "amazon": [142, 143], "amd": 64, "an": [2, 3, 5, 8, 14, 17, 18, 19, 20, 29, 30, 31, 32, 33, 34, 36, 44, 50, 52, 53, 54, 65, 66, 69, 72, 76, 83, 84, 85, 103, 106, 119, 120, 122, 124, 126, 134, 135, 142, 143, 148, 151, 161, 166, 167, 168], "ani": [22, 40, 81, 83, 101, 103, 111, 113, 114, 117, 131, 136, 142, 143, 151, 163], "anoth": [85, 153], "answer": [2, 5, 8, 40, 50, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 110, 113, 114, 117, 122, 124, 135, 161, 166], "anyth": 81, "api": [1, 2, 3, 5, 8, 44, 54, 65, 119, 120, 122, 124, 126, 161, 166, 167], "api_job": 3, "api_kei": [2, 3, 5, 8, 122, 124, 161], "apibas": [5, 8, 44, 122, 124, 126, 161], "apitimeouterror": [142, 143], "app": [165, 168], "append": 119, "appli": [14, 38, 52, 53, 109, 112, 113, 114, 119, 135, 142, 143, 151, 155, 167, 168], "applic": [2, 3, 5, 8, 103, 122, 124, 151, 161], "approxim": 124, "ar": [2, 3, 5, 8, 14, 17, 18, 19, 20, 22, 30, 36, 37, 38, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 67, 73, 83, 85, 88, 103, 106, 109, 110, 111, 112, 119, 120, 122, 124, 126, 129, 149, 151, 155, 161, 167, 168, 169], "architectur": [142, 143], "area": 113, "aren": 167, "arg": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 92, 93, 94, 95, 124, 132, 163], "argument": [14, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 75, 76, 77, 78, 79, 80, 81, 103, 113, 114, 116, 117, 124, 126, 131, 148, 149, 151, 153, 155, 158, 163], "arrai": [2, 5, 8, 122, 124, 161], "arriv": [142, 143], "articl": [161, 168], "ascend": 119, "ask": [2, 5, 8, 122, 124, 161], "assist": [2, 5, 8, 122, 124, 161], "assum": [2, 3, 5, 8, 122, 124, 161], "async": [2, 3, 5, 8, 14, 17, 18, 19, 20, 22, 39, 40, 42, 43, 44, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 69, 75, 76, 77, 78, 79, 80, 81, 110, 113, 114, 115, 116, 117, 122, 124, 151, 153, 155, 161], "async_call_nod": 44, "async_playwright": [151, 155], "async_run": 44, "asyncazureopenai": 65, "asyncfileload": [14, 52, 53], "asynchron": [2, 3, 5, 8, 55, 60, 79, 122, 124, 142, 161, 167], "asyncio": [14, 52, 53, 57, 58, 60, 61, 65, 71, 72, 75, 76, 77, 78, 79, 80, 81, 102, 103, 151, 155], "asyncopenai": 65, "asyncron": [2, 3, 5, 8, 122, 124, 161], "attempt": [19, 135, 151], "attribut": [2, 3, 5, 8, 18, 19, 20, 22, 24, 39, 40, 44, 52, 53, 57, 58, 60, 61, 63, 65, 67, 75, 76, 77, 78, 79, 80, 81, 83, 85, 97, 98, 103, 105, 107, 113, 114, 116, 117, 119, 120, 122, 124, 126, 147, 148, 149, 151, 153, 161], "author": [2, 3, 5, 8, 120, 122, 124, 161, 168], "autom": 168, "automat": [18, 19, 20, 65, 134], "avoid": [142, 143], "aw": [142, 143], "await": [2, 5, 8, 122, 124, 161], "azur": [2, 5, 8, 54, 122, 124, 161, 166, 167], "azure_api_kei": 54, "azure_endpoint": 54, "azure_openai_api_kei": [54, 167], "azure_openai_endpoint": [2, 5, 8, 54, 122, 124, 161, 167], "azure_openai_kei": [2, 5, 8, 122, 124, 161], "azure_openai_vers": [2, 5, 8, 54, 122, 124, 161, 167], "azure_vers": 54, "backoff": [65, 142, 143], "base": [5, 8, 22, 24, 39, 40, 42, 44, 47, 48, 49, 50, 60, 61, 65, 69, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 90, 97, 98, 103, 104, 105, 106, 107, 110, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 129, 142, 143, 147, 148, 149, 151, 153, 161], "base_delai": [142, 143], "base_url": 119, "basedocu": [14, 17, 18, 19, 20, 22, 52, 53, 115, 148, 149], "basellmcal": [42, 48, 49, 50], "basic": [5, 122, 132, 168, 169], "batch": [142, 143], "bearer": [2, 3, 5, 8, 122, 124, 161], "becaus": [2, 3, 5, 8, 122, 124, 161], "becom": [104, 148], "been": [17, 18, 19, 20, 103], "befor": [3, 19, 40, 83, 110, 119, 122, 135, 142, 143, 154], "begin": [5, 26, 27, 28, 101, 157], "being": [3, 44, 54, 66, 81, 83, 103, 126, 161], "belong": 115, "below": [161, 168, 169], "belval": 135, "better": [122, 166, 167], "between": [5, 19, 44, 120, 122, 124, 126, 129, 133], "blank": [136, 151], "blob": [66, 142, 143, 167], "block": 103, "blog": [102, 142, 143], "bodi": [5, 124], "bool": [26, 27, 28, 40, 60, 75, 76, 77, 78, 79, 81, 97, 105, 107, 110, 111, 113, 114, 115, 116, 117, 122, 124, 132, 133, 134, 135, 142, 143, 147, 148, 149, 151, 158, 159, 161], "boolean": [40, 58, 61, 80, 110, 113, 114, 117], "bound": [59, 83], "box": [142, 143], "brace": 122, "bracket": 120, "branch": [168, 169], "break": [3, 4, 5], "broadcast": [92, 93, 94, 95], "broadli": 113, "browser": [14, 52, 53, 54, 151, 155], "browser_semaphor": [14, 52, 53, 151, 155], "budget": 161, "build": 168, "byte": [60, 62, 63, 79, 134, 135, 151, 159], "byu": 36, "c": [163, 167], "cach": [79, 151], "call": [1, 2, 3, 5, 8, 40, 42, 44, 57, 58, 60, 61, 65, 72, 73, 75, 76, 77, 78, 79, 80, 81, 85, 103, 110, 122, 124, 126, 132, 142, 143, 161, 166, 167], "call_api": [2, 5, 8, 122, 124, 161], "call_api_async": [2, 5, 8, 122, 124, 161], "call_nod": [44, 126], "callabl": [2, 3, 5, 8, 44, 85, 103, 122, 124, 126, 151, 161], "caller": [39, 42, 45, 47, 48, 49, 50], "can": [2, 3, 5, 8, 14, 18, 19, 20, 29, 30, 31, 32, 33, 34, 40, 42, 44, 52, 53, 54, 58, 61, 65, 78, 80, 85, 103, 110, 119, 122, 124, 126, 135, 142, 143, 149, 151, 153, 155, 161, 166, 167], "can_process": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "cannot": [19, 100, 161], "care": 103, "case": [26, 27, 28, 40, 67, 85, 110, 120, 134, 158, 167], "catch": 107, "categor": [65, 85], "categori": [65, 85], "caution": 19, "cd": [168, 169], "certain": [19, 65, 83, 85], "certif": 151, "chanc": 19, "chang": 103, "char": [122, 129, 138], "char_thresh": [122, 129, 149], "charact": [101, 104, 122, 129, 135, 139, 157, 158], "chat": [2, 3, 5, 8, 48, 49, 65, 122, 124, 142, 143, 161, 166], "chat_llm_cal": 44, "chatbot": 168, "chatcomplet": 161, "chatgpt": [2, 5, 8, 122, 124, 161, 168], "chatllmcal": 44, "check": [14, 19, 26, 27, 28, 34, 35, 37, 40, 52, 53, 54, 57, 58, 61, 65, 80, 101, 105, 110, 111, 113, 114, 115, 116, 117, 122, 151, 157, 161], "check_for_correct_s": 39, "check_for_ordinance_info": [18, 19], "check_for_restrict": 39, "children": 107, "chose": [113, 114], "chromium": [151, 153, 155], "chunk": [8, 14, 17, 18, 19, 39, 40, 52, 53, 54, 101, 110, 122, 124, 148, 161, 166, 168], "chunk_kwarg": [8, 124], "chunk_text": 5, "chunk_token": 5, "chunker": [8, 124], "chunks_input": 5, "citi": [113, 114, 117], "cl": [2, 8, 122], "class": [1, 2, 3, 4, 5, 7, 8, 21, 22, 23, 24, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 56, 57, 58, 59, 60, 61, 64, 65, 68, 69, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 96, 97, 98, 102, 103, 104, 105, 106, 107, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 132, 142, 143, 146, 147, 148, 149, 150, 151, 152, 153, 160, 161], "classifi": 97, "classmethod": [2, 5, 8, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 119, 122, 124, 161], "claus": 30, "clean": [2, 5, 8, 18, 19, 54, 57, 58, 65, 75, 77, 120, 122, 124, 129, 130, 137, 138, 139, 140, 147, 148, 149, 157, 161], "clean_dir": 54, "clean_head": [122, 149], "clean_header_kwarg": 149, "clean_pag": 122, "clean_paragraph": 5, "clean_poppl": 122, "clean_tabl": 8, "clean_txt": 122, "clean_txt_async": [2, 5, 8, 122, 124, 161], "cleaned_ordinance_text": [18, 19, 20], "cleanup": [60, 61, 75, 76, 77, 78, 79, 80, 81], "clear": [2, 5, 8, 85, 119, 120, 122, 124, 161], "cli": 167, "client": 65, "clientsess": 151, "clone": [168, 169], "close": 103, "closer": 161, "code": [97, 167, 168, 169], "cohes": 124, "collaps": 122, "collect": 3, "collect_job": 3, "colorado": [104, 105], "column": [54, 90, 122, 133, 134, 161, 167], "com": [2, 3, 5, 8, 66, 83, 102, 122, 124, 135, 142, 143, 151, 161, 167, 168, 169], "combin": [36, 40, 115, 124, 130], "command": [2, 5, 8, 122, 124, 161, 163, 168, 169], "comment": 122, "common": 138, "compar": [57, 65], "comparison": 84, "complet": [2, 3, 5, 8, 65, 67, 122, 124, 142, 143, 161, 167], "comprehens": 124, "comput": [57, 85, 158, 161], "compute_fn_from_url": [60, 79, 159], "concurr": [14, 52, 53, 54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 151, 155], "conda": [168, 169], "condit": [44, 126], "condition": 103, "config": [163, 167], "configur": [142, 143, 163, 167], "connect": 151, "consecut": 101, "consid": [19, 122, 129], "consider": 83, "consult": 103, "contain": [8, 17, 18, 19, 20, 22, 36, 40, 42, 47, 48, 49, 50, 53, 54, 60, 61, 62, 63, 66, 67, 75, 76, 77, 78, 79, 80, 81, 85, 88, 89, 90, 97, 100, 101, 103, 104, 105, 107, 110, 113, 114, 115, 131, 133, 134, 135, 136, 137, 139, 140, 147, 148, 149, 151, 153, 157, 159, 161, 163, 167], "contains_ord_info": 17, "content": [2, 3, 5, 8, 38, 48, 49, 50, 60, 66, 79, 85, 100, 108, 113, 114, 115, 116, 117, 122, 124, 134, 135, 136, 151, 159, 161], "context": [2, 5, 8, 30, 69, 104, 122, 124], "continu": [2, 5, 8, 122, 124], "contract": 168, "control": 65, "convers": [2, 5, 8, 38, 44, 109, 122, 124, 126, 161], "convert": [36, 60, 79, 103, 131, 138, 158, 159, 166, 168], "convo": 161, "cookbook": [66, 142, 143], "copi": [103, 119, 120], "coroutin": [105, 106, 151], "corpu": [161, 166], "correct": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 115, 168, 169], "correct_counti": 117, "correct_st": 117, "correspond": [2, 5, 8, 40, 85, 106, 110, 115, 120, 122, 124, 134, 135, 153, 161], "cosin": 161, "cosine_dist": 161, "cost": 167, "could": [103, 161], "count": [2, 3, 5, 8, 66, 111, 119, 122, 124, 147, 148, 149, 161], "count_token": [2, 5, 8, 122, 124, 161], "counti": [13, 14, 52, 53, 54, 113, 114, 115, 117, 163, 167], "countri": 113, "county_db": 54, "county_dbs_dir": 54, "county_fp": [54, 90], "county_info": 88, "county_ord_fil": 54, "county_ords_dir": 54, "cpu": 54, "creat": [8, 54, 65, 119, 120, 148, 168, 169], "createlock": 103, "creation": 78, "creativ": [2, 5, 8, 122, 124, 161], "cryptic": 167, "csv": [54, 90, 167], "current": [54, 67, 81, 85, 103], "current_task": 103, "current_usag": 67, "custom": [9, 91, 103], "d": [85, 120], "dai": 22, "data": [2, 3, 5, 8, 12, 18, 19, 20, 42, 50, 113, 114, 117, 118, 120, 122, 124, 161, 168], "databas": [54, 168], "datafam": 20, "datafram": [42, 54, 88, 89, 90, 119, 131, 161], "date": [17, 120], "de": 168, "debug": [161, 165], "decis": [25, 43, 44, 125, 126, 167, 168], "decisiontre": [29, 30, 31, 32, 33, 34, 44], "decor": [142, 143], "decreas": 54, "deeper": 167, "default": [2, 5, 8, 14, 19, 40, 42, 47, 48, 49, 50, 52, 53, 54, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 83, 85, 88, 97, 101, 103, 104, 105, 106, 107, 110, 111, 115, 119, 120, 122, 124, 132, 134, 135, 142, 143, 147, 148, 149, 151, 153, 155, 158, 159, 161, 163], "default_header_templ": 151, "default_model": [2, 5, 8, 122, 124, 161], "defin": [90, 151, 167], "definit": 146, "delai": [142, 143], "delet": 73, "demark": 5, "demonstr": 166, "depart": 168, "depend": [103, 168, 169], "deploy": 167, "descend": 119, "describ": [40, 110, 113, 114, 116, 117, 135], "descript": [24, 142, 143], "desir": 124, "detail": [119, 167], "detect": [5, 19, 111, 131, 134, 157], "determin": [103, 105, 113, 114], "determinist": [2, 5, 8, 122, 124, 161], "dev": [168, 169], "develop": [168, 169], "df": 161, "diagnost": 161, "dict": [2, 3, 5, 8, 14, 50, 52, 53, 54, 60, 67, 75, 76, 77, 78, 79, 81, 85, 88, 100, 103, 120, 122, 124, 147, 148, 149, 151, 159, 161], "dictat": 111, "dictionari": [14, 50, 52, 53, 66, 67, 70, 85, 88, 100, 120, 148, 149], "did": [19, 35], "digit": [119, 120, 157], "digraph": [29, 30, 31, 32, 33, 34, 44, 126], "dir": [168, 169], "direct": [44, 126], "directori": [53, 54, 60, 61, 75, 76, 77, 78, 79, 80, 81, 104, 119, 159, 167], "discard": 83, "disjoint": 124, "disk": [78, 159], "displai": 153, "distanc": 161, "distil": [123, 124], "dnt": 151, "do": [2, 3, 5, 8, 114, 117, 122, 124, 136, 161, 167, 168], "doc": [17, 18, 19, 20, 22, 60, 75, 76, 77, 78, 79, 115, 119, 147, 149, 159], "document": [2, 5, 12, 14, 16, 17, 18, 19, 20, 22, 35, 38, 40, 42, 52, 53, 54, 60, 62, 63, 75, 76, 77, 78, 79, 108, 109, 112, 113, 114, 115, 116, 117, 122, 124, 129, 130, 134, 135, 151, 159, 165, 168], "doe": [18, 19, 20, 26, 40, 44, 50, 54, 73, 85, 103, 110, 119, 120, 122, 126, 131, 133, 168], "doesnt": 119, "doi": 120, "don": [40, 90], "dot": 140, "doubl": [5, 83, 122, 157], "down": [105, 151], "download": [12, 52, 54, 79, 119, 120, 151, 163, 165, 167], "downstream": 122, "drop": [83, 103], "due": 78, "duplic": [122, 129], "durat": 57, "dure": [42, 47, 48, 49, 50, 69, 103, 134, 135, 142, 143, 148], "dynam": 103, "e": [2, 5, 8, 38, 40, 42, 47, 48, 49, 50, 57, 85, 103, 107, 109, 110, 111, 113, 119, 120, 122, 124, 142, 143, 157, 161, 167, 168, 169], "each": [2, 5, 8, 36, 39, 40, 66, 101, 106, 110, 119, 122, 124, 142, 143, 147, 148, 149, 153, 161, 167], "edg": [44, 126], "effici": 83, "either": [2, 3, 5, 8, 85, 120, 122, 124, 161], "el": [104, 105], "element": [83, 119], "elm": [162, 166, 167, 169], "elmruntimeerror": [142, 143], "els": [44, 85, 103, 120, 126], "emb": [2, 5, 122, 124, 161, 166], "embed": [2, 3, 5, 7, 8, 122, 124, 161, 165, 168], "embedding_model": [2, 5, 8, 122, 124, 161], "embedding_url": [2, 5, 8, 122, 124, 161], "emiss": 103, "emit": [103, 106, 107], "empti": [2, 3, 5, 8, 50, 67, 85, 100, 119, 120, 122, 124, 134, 135, 136, 137, 147, 148, 149, 161], "en": 151, "enabl": 54, "enact": 17, "encapsul": 17, "enclos": 131, "encoding_for_model": 66, "encount": 103, "end": [60, 79, 83, 119, 122, 129, 149, 158, 159], "endpoint": [2, 5, 8, 54, 122, 124, 161], "energi": [0, 38, 109, 111, 124, 160, 161, 165], "engin": [124, 161], "engineer_queri": 161, "enough": [111, 113, 114, 131], "enqueu": 103, "ensur": [19, 103, 119, 142, 143], "entri": [2, 3, 5, 8, 83, 84, 119, 122, 124, 153, 161], "entropi": [2, 5, 8, 122, 124, 161], "env": [168, 169], "environ": [2, 5, 8, 54, 122, 124, 161, 167, 168, 169], "equal": [103, 119, 153, 161], "equip": 106, "error": [2, 3, 5, 8, 9, 10, 18, 50, 65, 91, 92, 93, 103, 122, 124, 134, 135, 142, 143, 161, 167], "etc": [42, 89, 90, 111], "event": 107, "everi": [105, 107], "ex": 63, "exact": [113, 114, 116, 117], "exactli": [113, 114, 117, 167], "examin": 167, "exampl": [40, 57, 66, 84, 85, 104, 105, 110, 126, 142, 143, 153, 161, 166, 167, 168], "exce": [5, 19, 111, 115, 149, 153], "exceed": 3, "except": [81, 103], "excerpt": 113, "exclus": [23, 24], "execut": [44, 54, 69, 126, 153, 165], "executor": 54, "exist": [54, 67, 73, 81, 119], "exit": 163, "expect": [40, 110], "expected_results_per_pag": 153, "experi": 167, "explan": [113, 114, 167], "explicit": 166, "explicitli": 113, "exponenti": [142, 143], "exponential_bas": [142, 143], "express": 168, "extend": [119, 167], "extens": [147, 165], "extra": 161, "extract": [12, 50, 52, 53, 54, 113, 114, 117, 122, 132, 149, 155, 163, 167, 168, 169], "extract_ordinance_text": 20, "extractor": 18, "f": [2, 3, 5, 8, 85, 120, 122, 124, 161], "factor": [2, 3, 5, 8, 122, 124, 142, 143, 161], "factual": [2, 5, 8, 122, 124, 161], "fail": [65, 157], "failur": [142, 143], "fals": [17, 40, 60, 79, 97, 103, 105, 110, 113, 114, 115, 116, 117, 119, 122, 151, 153, 158, 159, 161], "fancy_combin": 124, "fashion": 161, "fast": [58, 61, 80], "featur": [29, 30, 33, 34, 42, 50], "fetch": 151, "fetch_al": 151, "few": 167, "file": [8, 13, 53, 54, 60, 62, 63, 75, 76, 77, 78, 79, 81, 90, 100, 104, 106, 113, 114, 117, 119, 122, 134, 135, 147, 150, 151, 158, 159, 163, 167], "file_cache_coroutin": 151, "file_cont": [60, 79, 159], "file_extens": 147, "file_load": [14, 52, 53], "file_loader_kwarg": [14, 52, 53, 54], "filenam": 158, "filepath": [8, 120, 122], "fill": [44, 126, 167], "filter": [103, 105, 107], "final": [44, 126], "find": 161, "fip": [89, 90, 97], "firm": 167, "first": [36, 40, 110, 111, 113, 114, 117, 119, 120, 142, 143, 151, 153, 157, 167], "fix": 135, "fixedmessagevalid": [113, 114, 117], "flag": [97, 119, 122, 124, 161, 163, 167], "flat": 167, "flicker": 42, "float": [2, 3, 5, 8, 19, 37, 57, 65, 83, 115, 122, 124, 129, 161], "floatfmt": 148, "flush": 103, "fmt": 103, "fmt_kwarg": [113, 114, 116, 117], "fn": 60, "fo": 101, "folder": [54, 167], "follow": [42, 47, 48, 49, 50, 85, 102, 120, 167], "footer": [122, 129, 149], "forc": 17, "form": [107, 120, 149, 168], "format": [2, 5, 8, 40, 44, 50, 85, 97, 103, 110, 113, 114, 116, 117, 122, 124, 126, 131, 134, 138, 161], "format_html_t": 148, "formatt": 103, "found": [14, 17, 18, 19, 20, 22, 37, 40, 52, 53, 85, 90, 110, 120, 161, 166, 167], "fp": [90, 120, 122], "fraction": [19, 37, 122, 129, 142, 143], "freedesktop": 122, "freedom": [2, 5, 8, 122, 124, 161], "from": [2, 5, 8, 17, 18, 19, 22, 31, 35, 36, 37, 39, 40, 44, 50, 54, 57, 58, 60, 61, 62, 63, 65, 75, 76, 77, 78, 79, 80, 81, 83, 85, 103, 105, 106, 110, 113, 114, 115, 117, 118, 119, 120, 122, 124, 126, 129, 130, 132, 134, 135, 136, 138, 142, 143, 147, 148, 149, 155, 158, 159, 161, 166, 167, 168, 169], "from_osti_id": 119, "fromkei": 120, "full": [2, 5, 8, 44, 51, 97, 98, 113, 122, 124, 126, 130, 161, 167], "full_nam": [97, 98], "function": [13, 16, 17, 25, 35, 42, 47, 48, 49, 50, 51, 57, 58, 59, 60, 61, 64, 65, 70, 72, 73, 75, 76, 77, 78, 79, 80, 81, 87, 99, 105, 109, 119, 127, 128, 129, 131, 135, 141, 142, 143, 148, 149, 151, 153, 154, 156, 157, 167], "fund": 168, "fut": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "futur": [54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "g": [2, 5, 8, 38, 40, 57, 109, 110, 111, 122, 124, 126, 157, 161, 167], "gener": [2, 5, 8, 10, 92, 122, 124, 161], "generic_async_queri": [2, 5, 8, 122, 124, 161], "generic_queri": [2, 5, 8, 122, 124, 161], "get": [2, 3, 5, 8, 18, 24, 29, 32, 44, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 85, 103, 119, 120, 122, 124, 126, 151, 161, 167], "get_embed": [2, 5, 8, 122, 124, 161], "get_nam": 103, "git": [168, 169], "github": [66, 135, 142, 143, 167, 168, 169], "give": [2, 5, 8, 122, 124, 161], "given": [30, 85, 119, 120, 122, 129, 151, 167], "global": [124, 161], "go": [122, 129], "good": 19, "googl": [14, 52, 53, 54, 151, 152, 153, 157], "google_search": [14, 52, 53], "gov": 119, "govern": 168, "gpt": [2, 3, 5, 8, 54, 65, 122, 124, 126, 130, 161, 165, 168], "grant": 126, "graph": [44, 126, 167], "greater": [30, 111], "guid": [124, 135], "ha": [17, 18, 19, 20, 37, 85, 103, 119, 120, 122, 133, 157], "hallucin": 19, "handl": [60, 75, 76, 77, 78, 79, 103, 119, 120], "handleerror": 103, "handler": [103, 104, 106], "hang": 119, "happi": 167, "have": [2, 5, 8, 18, 19, 20, 44, 54, 60, 65, 66, 73, 75, 76, 77, 78, 79, 103, 105, 107, 122, 124, 126, 159, 161, 166, 167], "header": [2, 3, 5, 8, 122, 124, 129, 131, 149, 151, 161], "header_templ": 151, "headless": 153, "hello": 126, "help": [19, 35, 40, 110, 124, 142, 143, 161], "helper": 22, "here": [54, 126, 135, 161, 167], "heurist": [40, 111, 122, 133], "high": [2, 5, 8, 44, 54, 122, 124, 126, 161], "histori": [2, 5, 8, 44, 122, 124, 126, 161], "hold": 3, "home": [168, 169], "how": [8, 126, 135, 166, 167], "how_to_count_tokens_with_tiktoken": 66, "how_to_handle_rate_limit": [142, 143], "howev": [103, 167], "html": [54, 60, 79, 131, 132, 148, 151, 154, 155, 159], "html2text": 132, "html_read_coroutin": 151, "html_read_kwarg": 151, "html_table_to_markdown_kwarg": 148, "htmldocument": 151, "http": [2, 3, 5, 8, 66, 83, 102, 119, 122, 124, 135, 142, 143, 151, 161, 167], "hyphen": 8, "i": [2, 3, 5, 8, 19, 36, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 60, 65, 66, 67, 75, 76, 77, 78, 79, 83, 85, 88, 97, 103, 105, 106, 107, 110, 113, 114, 116, 117, 119, 120, 122, 124, 126, 131, 135, 142, 143, 147, 148, 149, 151, 153, 154, 157, 158, 161, 163, 166, 167, 168, 169], "id": [119, 120], "identifi": [104, 105], "idx": 161, "ignor": [2, 3, 5, 8, 40, 103, 122, 124, 132, 148, 161], "ignore_error": [2, 3, 5, 8, 122, 124, 161], "ignore_html_link": 148, "ignore_link": 132, "ihead": [122, 129, 149], "imag": 151, "implement": [14, 17, 18, 19, 52, 53, 102, 103, 148], "import": [126, 167], "includ": [2, 5, 8, 19, 36, 37, 40, 110, 113, 114, 117, 122, 124, 138, 142, 143, 161], "inclus": 19, "increas": 54, "ind": [40, 110], "index": [40, 110, 119, 122, 148], "indexerror": 119, "indic": [5, 58, 61, 80, 97, 122, 129, 161], "individu": [40, 54], "infer": [85, 114, 117], "info": [17, 19, 22, 39, 50, 53, 54, 65, 81, 85, 87, 88, 89, 90, 104, 106, 149], "inform": [17, 18, 19, 52, 53, 54, 60, 67, 75, 76, 77, 78, 79, 85, 113, 114, 117, 122, 159, 161], "init": [44, 92, 93, 94, 95, 126, 161], "init_logg": 126, "initi": [2, 5, 8, 14, 17, 19, 20, 29, 30, 31, 32, 33, 34, 52, 53, 54, 62, 63, 71, 72, 85, 93, 107, 119, 122, 124, 161], "initialis": 103, "input": [2, 3, 5, 8, 14, 17, 18, 19, 36, 37, 39, 40, 44, 52, 53, 54, 65, 66, 67, 81, 83, 85, 90, 110, 111, 115, 119, 122, 124, 126, 129, 132, 133, 138, 142, 143, 148, 153, 157, 158, 161, 167], "insecur": 151, "insensit": [26, 27, 28], "insert": [119, 120], "instal": [135, 166, 167], "instanc": [14, 17, 18, 19, 20, 22, 29, 30, 31, 32, 33, 34, 39, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 61, 62, 63, 65, 71, 72, 80, 81, 103, 104, 110, 113, 114, 115, 116, 117, 126, 139, 140, 148, 151, 155], "instead": 54, "instruct": [50, 122, 167, 168, 169], "int": [2, 3, 5, 8, 14, 19, 36, 37, 40, 52, 53, 54, 57, 65, 66, 83, 97, 101, 103, 110, 111, 119, 122, 124, 142, 143, 149, 153, 161], "intact": 103, "integ": [5, 40, 110, 119, 122, 129], "intend": 103, "intens": 54, "interest": [103, 168], "interfac": [124, 161, 163, 166, 168], "intern": [103, 131, 168], "internet": 79, "interv": [57, 65], "ipynb": [66, 142, 143], "ipython": [2, 5, 8, 122, 124, 161], "irrevoc": 168, "is_double_col": 122, "is_good_paragraph": 5, "is_legal_text": 40, "is_parish": 97, "item": [85, 103, 119, 120, 129], "iter": [69, 85, 101, 106, 119, 120, 134, 135, 136, 142, 143, 147, 148, 149, 151], "its": [107, 168, 169], "itself": [8, 35, 81, 119], "javascript": 154, "jitter": [142, 143], "job": [3, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "join": [122, 129], "json": [2, 3, 5, 8, 40, 50, 81, 100, 103, 110, 113, 114, 117, 122, 124, 161, 163, 167], "jupyt": [2, 5, 8, 122, 124, 161], "jurisdict": [113, 115], "just": [2, 5, 8, 103, 122, 124, 140, 161], "k": [85, 120], "keep": [149, 151, 167], "kei": [2, 5, 8, 14, 17, 18, 19, 20, 40, 42, 47, 48, 49, 50, 52, 53, 54, 60, 66, 67, 75, 76, 77, 78, 79, 85, 88, 110, 113, 114, 115, 117, 119, 120, 122, 124, 159, 161, 166, 167], "keyerror": [85, 120], "keyword": [14, 17, 19, 20, 29, 30, 31, 32, 33, 34, 42, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 75, 76, 77, 78, 79, 80, 81, 111, 113, 114, 116, 117, 131, 148, 149, 151, 153, 155], "kick": 167, "known": [18, 19, 20, 54], "kwarg": [8, 14, 17, 19, 20, 29, 30, 31, 32, 33, 34, 42, 47, 48, 49, 50, 52, 53, 57, 58, 60, 61, 62, 63, 65, 75, 76, 77, 78, 79, 80, 81, 92, 93, 94, 95, 124, 131, 147, 151], "label": [48, 49, 50, 65, 81, 85, 161], "laboratori": 168, "lack": [85, 120], "lambda": 126, "langch": 157, "langchain": [14, 17, 18, 19, 52, 53, 54, 148], "languag": [0, 151], "larg": [2, 3, 5, 8, 38, 39, 40, 109, 110, 122, 124, 161, 168], "larger": 153, "largest": 32, "last": [119, 120], "latest": 166, "latter": 66, "launch": [151, 153, 155], "launch_kwarg": 153, "layout": [122, 134], "lead": [54, 157], "leaf": [44, 126], "least": 161, "leav": 103, "legal": [38, 40, 109, 112, 113, 114], "len": [40, 110], "length": [2, 5, 8, 122, 124, 153, 158, 161], "less": [40, 110, 158], "lesser": 30, "level": [2, 5, 8, 44, 53, 54, 85, 103, 104, 106, 113, 122, 124, 126, 161], "licens": 168, "lifo": 120, "like": [2, 3, 5, 8, 48, 53, 54, 75, 76, 77, 78, 81, 85, 89, 90, 104, 113, 120, 122, 124, 133, 159, 161, 167, 168], "limit": [2, 3, 5, 8, 14, 52, 53, 54, 57, 60, 65, 75, 76, 77, 78, 79, 122, 124, 142, 143, 147, 148, 149, 151, 155, 161, 167], "line": [5, 42, 122, 124, 129, 137, 163], "link": [132, 148, 153], "list": [2, 3, 5, 8, 14, 17, 18, 19, 36, 39, 40, 44, 52, 53, 54, 66, 110, 119, 120, 122, 124, 126, 129, 130, 136, 147, 148, 149, 151, 153, 161, 163], "listen": [53, 104, 106], "live": 83, "llc": 168, "llm": [2, 5, 8, 14, 17, 18, 19, 20, 22, 26, 27, 28, 35, 39, 40, 42, 44, 52, 53, 54, 85, 100, 110, 113, 114, 115, 116, 117, 122, 123, 124, 126, 161, 166, 168], "llm_call_kwarg": 54, "llm_caller": 39, "llm_servic": [42, 47, 48, 49, 50], "llm_service_rate_limit": [54, 167], "llmcaller": [14, 17, 19, 20, 39, 52, 53, 54], "load": [54, 60, 62, 63, 88, 89, 90, 122, 134, 154], "load_county_info": 88, "load_pdf": 122, "loader": [150, 151], "local": [79, 103, 167], "locat": [14, 52, 53, 103, 104, 105, 107], "locationfilelog": 53, "lock": 103, "log": [53, 54, 102, 103, 104, 105, 106, 107, 126, 134, 135, 163, 167], "log_dir": [53, 54, 104], "log_level": 54, "loggabl": 103, "logger": [53, 54, 92, 93, 94, 95, 106, 107], "logger_nam": 106, "logginglisten": 104, "logic": [13, 21, 38, 44, 51, 109, 112, 115, 126], "loglisten": 53, "logrecord": [103, 105, 107], "long": 57, "longer": 106, "look": [2, 3, 5, 8, 40, 110, 111, 122, 124, 129, 133, 161, 167], "lookup": 103, "lowercas": 88, "machin": [54, 167], "made": 85, "madmaz": 135, "mai": [2, 5, 8, 17, 40, 42, 50, 67, 75, 76, 77, 78, 101, 103, 110, 111, 122, 124, 133, 134, 135, 157, 161, 167], "main": [66, 142, 143, 151, 167, 168, 169], "maintain": [119, 122], "make": [2, 5, 8, 35, 60, 79, 122, 124, 158, 159, 161, 167, 168, 169], "make_gpt_messag": 122, "make_name_uniqu": [60, 79, 159], "make_ref_list": 161, "make_uniqu": 158, "manag": [3, 69, 78, 104], "mani": [139, 140], "map": [85, 88, 103], "markdown": 131, "match": [105, 111, 117, 131, 167], "match_count_threshold": 111, "max": [30, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 140, 142, 143, 149, 158], "max_concurrent_job": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "max_num_concurrent_brows": 54, "max_raw_pag": 149, "max_retri": [3, 142, 143], "max_second": 83, "maximum": [19, 83, 139], "mean": [18, 19, 20, 148], "measur": [2, 5, 8, 122, 124, 161], "memori": 49, "mention": [111, 113, 117], "merg": [5, 101, 103], "merge_chunk": 5, "messag": [2, 3, 5, 8, 42, 44, 47, 48, 49, 50, 66, 92, 93, 94, 95, 101, 103, 105, 107, 113, 114, 116, 117, 122, 124, 126, 161, 167], "met": [44, 126], "meta": [60, 75, 76, 77, 78, 79, 119, 149, 159], "metadata": [17, 18, 19, 20, 52, 53, 60, 75, 76, 77, 78, 79, 115, 147, 148, 149, 159], "method": [2, 3, 5, 8, 14, 17, 18, 19, 22, 24, 39, 40, 42, 44, 47, 48, 49, 50, 52, 53, 57, 58, 60, 61, 63, 65, 69, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 97, 98, 103, 104, 105, 106, 107, 110, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 134, 135, 147, 148, 149, 151, 153, 161], "might": [103, 168], "million": 119, "min": 30, "min_chunks_to_process": 40, "mind": 167, "minimum": 40, "minut": [2, 3, 5, 8, 57, 65, 122, 124, 161, 167], "miss": 107, "model": [0, 2, 3, 5, 8, 54, 65, 66, 85, 122, 124, 126, 161, 166], "model_instruct": [122, 124, 161], "model_rol": [2, 5, 8, 122, 124, 161], "modifi": [103, 119], "modul": [70, 102, 103, 135, 167], "monitor": [42, 47, 48, 49, 50], "month": 22, "more": [2, 5, 8, 36, 42, 58, 61, 80, 103, 113, 119, 122, 124, 133, 139, 140, 149, 161, 167], "most": [17, 103, 161], "mostli": [102, 103], "move": [76, 78], "much": 161, "mult": 30, "multi": [130, 134], "multipl": [119, 122, 133, 142, 143, 161], "multipli": [33, 142, 143], "must": [18, 19, 20, 40, 44, 60, 63, 65, 66, 67, 75, 76, 77, 78, 79, 103, 104, 105, 110, 111, 113, 114, 117, 126, 131, 151, 159, 161, 167], "mutual": [23, 24], "n": [2, 5, 8, 19, 36, 37, 101, 122, 124, 129, 138, 139, 149, 161, 168, 169], "n_page": 119, "n_word": 124, "name": [44, 54, 57, 58, 60, 61, 65, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 88, 89, 90, 97, 98, 103, 104, 106, 107, 115, 119, 124, 126, 158, 159, 161], "namespac": 167, "narr": 124, "nation": 168, "nativ": 166, "ndarrai": 161, "nearli": [2, 5, 8, 122, 124, 161], "necessarili": 168, "need": [2, 5, 8, 44, 57, 58, 65, 122, 124, 126, 129, 135, 161, 166, 167], "neg": 19, "nest": 5, "networkx": [44, 126], "never": [124, 149, 153], "new": [83, 120, 161, 167], "new_info_threshold": 161, "newlin": [137, 139], "next": [44, 126], "ngram": 19, "ngram_fraction_threshold": 19, "nice": 122, "node": [44, 126], "node0": [44, 126], "nois": 42, "nomin": 5, "non": [31, 34, 103], "none": [2, 3, 5, 8, 14, 17, 22, 42, 47, 48, 49, 50, 52, 53, 54, 60, 61, 65, 71, 75, 76, 77, 78, 79, 80, 81, 85, 88, 97, 116, 119, 120, 122, 124, 147, 148, 149, 151, 155, 161], "nonexclus": 168, "normal": 149, "notabl": 167, "note": [2, 3, 5, 8, 17, 18, 19, 20, 63, 65, 88, 122, 124, 129, 131, 153, 161, 166, 168, 169], "noth": 103, "now": 166, "np": 161, "nrel": [166, 167, 168, 169], "nthe": 122, "num_end_pages_to_keep": 149, "num_extraction_attempt": 19, "num_raw_pages_to_keep": 149, "num_result": 153, "num_to_recal": [40, 110], "num_url": [14, 52, 53], "num_urls_to_check_per_counti": 54, "number": [2, 3, 5, 8, 14, 19, 36, 37, 40, 52, 53, 54, 57, 58, 60, 61, 65, 66, 75, 76, 77, 78, 79, 80, 81, 101, 110, 111, 119, 120, 122, 124, 137, 142, 143, 144, 149, 151, 153, 155, 161], "numer": [2, 5, 8, 119, 122, 124, 161], "nx": [29, 30, 31, 32, 33, 34, 44, 126], "o": [54, 103], "obj": [14, 17, 18, 19, 52, 53, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 84, 148], "object": [3, 14, 17, 18, 19, 22, 24, 39, 42, 44, 47, 48, 49, 50, 52, 53, 57, 58, 60, 61, 65, 66, 67, 69, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 103, 104, 106, 110, 115, 119, 120, 126, 136, 148, 151, 153], "occur": [40, 78, 103, 110], "occurr": 119, "ocr": [54, 63, 135, 151, 167], "od": 81, "off": 167, "offic": 168, "offset": [142, 143], "oid": 119, "ok": 73, "older": [83, 167], "onc": [119, 167], "one": [2, 3, 5, 8, 36, 42, 85, 113, 119, 122, 124, 133, 135, 151, 161], "onli": [2, 3, 5, 8, 40, 104, 110, 113, 122, 124, 135, 137, 139, 140, 151, 161], "onto": 158, "open": [14, 52, 53, 54, 60, 61, 75, 76, 77, 78, 79, 80, 81, 147, 151, 155], "openai": [2, 3, 5, 8, 54, 57, 85, 122, 124, 142, 143, 161, 166, 167], "openai_api_kei": [2, 5, 8, 122, 124, 161], "oper": [5, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 168], "opt": 167, "optic": 135, "option": [2, 3, 5, 8, 14, 19, 40, 42, 47, 48, 49, 50, 52, 53, 54, 60, 65, 75, 76, 77, 78, 79, 81, 83, 85, 88, 97, 101, 104, 106, 110, 111, 115, 122, 124, 132, 134, 135, 142, 143, 147, 148, 149, 151, 153, 155, 158, 159, 161, 163, 166, 167, 168, 169], "ord": [162, 167], "order": [3, 39, 40, 110, 119, 120, 167], "ordin": [6, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 23, 25, 29, 31, 32, 35, 41, 42, 43, 45, 46, 51, 52, 53, 54, 59, 64, 74, 82, 86, 87, 91, 92, 93, 94, 95, 96, 99, 102, 108, 109, 112, 163, 165, 168, 169], "ordinance_text": [17, 18, 19, 40], "ordinance_valu": 20, "ordinanceextractor": 18, "org": 122, "origin": [19, 35, 36, 37, 40, 103, 110, 122, 148, 161], "osti": [166, 168], "osti_id": 120, "other": [17, 42, 44, 85, 113, 126, 142, 143, 151, 165, 168], "otherwis": [17, 40, 75, 76, 77, 78, 85, 103, 110, 113, 114, 115, 116, 117, 120], "out": [2, 3, 5, 8, 42, 44, 75, 76, 77, 78, 119, 120, 122, 124, 126, 129, 133, 142, 143, 161, 167], "out_dir": [54, 75, 76, 77, 78, 119, 159], "outgo": 66, "outlin": [113, 114, 116, 117], "output": [2, 3, 5, 8, 19, 40, 53, 54, 60, 75, 76, 77, 78, 79, 81, 103, 104, 105, 110, 122, 124, 159, 161], "ov": 135, "over": [57, 83], "overlap": [4, 5, 54, 101], "overrid": [103, 161], "overridden": 103, "own": 166, "owner": 34, "packag": [166, 168, 169], "pad": 5, "page": [62, 63, 115, 119, 122, 129, 130, 134, 135, 136, 137, 147, 148, 149, 153, 155], "page_rang": 122, "page_thresh": [122, 129, 149], "paid": 168, "pair": [14, 17, 19, 20, 29, 30, 31, 32, 33, 34, 52, 53, 54, 60, 61, 75, 76, 77, 78, 79, 80, 81, 85, 120, 148, 149, 151, 155], "panda": 131, "paragraph": [5, 8, 124], "paragraph_token": 5, "parallel": [2, 3, 5, 8, 122, 124, 161], "paramet": [2, 3, 5, 8, 14, 17, 18, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 66, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 88, 90, 97, 98, 100, 101, 103, 104, 105, 106, 107, 110, 111, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 142, 143, 147, 148, 149, 151, 153, 155, 157, 158, 159, 161, 167], "parenthes": 120, "parish": 97, "pars": [2, 3, 5, 8, 17, 18, 19, 20, 22, 40, 49, 52, 54, 106, 110, 115, 122, 124, 148, 151, 161, 167], "parse_from_ind": [40, 110], "parser": [67, 121, 138], "part": [19, 104], "particip": 34, "particular": [17, 18, 19, 20, 29, 38, 63, 109, 111, 112, 114], "paso": [104, 105], "pass": [19, 40, 42, 47, 48, 49, 50, 53, 54, 57, 58, 60, 61, 62, 63, 65, 66, 75, 76, 77, 78, 79, 80, 81, 103, 110, 111, 113, 114, 116, 117, 131, 147, 148, 149, 151, 153, 155, 163], "path": [53, 54, 60, 75, 76, 77, 78, 79, 81, 90, 104, 159, 163, 167], "pd": [42, 54, 88, 89, 90, 119, 161], "pdf": [2, 5, 8, 54, 60, 62, 63, 79, 119, 120, 124, 134, 135, 138, 149, 151, 159, 161, 165, 167, 168], "pdf2imag": 135, "pdf_byte": [60, 62, 63, 134, 135], "pdf_ocr_read_coroutin": 151, "pdf_raw_text": 122, "pdf_read_coroutin": 151, "pdf_read_kwarg": 151, "pdfdocument": [62, 63, 151], "pdftotext": [122, 167], "pdftotxt": [2, 5, 8, 124, 161, 166], "per": [5, 19, 36, 37, 57, 65, 119, 151, 153, 167], "percent": 149, "percent_raw_pages_to_keep": 149, "perfect": 124, "perform": [5, 54, 84, 111, 124, 131, 151, 161, 168], "pertain": [40, 110, 111, 115], "phrase": 111, "physic": [122, 134], "piec": 8, "pip": [166, 168, 169], "pipe": 8, "place": [103, 119], "playwright": [14, 52, 53, 151, 153, 154, 155], "playwrightgooglelinksearch": [14, 52, 53], "pleas": [122, 124], "plu": [142, 143], "point": [19, 63, 167], "pool": [54, 60, 61, 62, 63, 75, 76, 77, 78, 79, 80, 81], "poorli": 122, "pop": [85, 119, 120], "popitem": [85, 120], "popper": 166, "poppler": [122, 166], "popul": [18, 19, 20], "portion": [101, 167], "posit": [40, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 110], "possibl": [131, 167], "possibli": [137, 139, 140], "potenti": [17, 137], "ppe_kwarg": 54, "pre": 124, "prefix": [124, 161], "preflight": 161, "preflight_corpu": 161, "prepar": 103, "prerequisit": 165, "present": [85, 119, 120], "presum": 138, "pretti": 131, "prevent": 161, "previou": [2, 5, 8, 40, 101, 110, 122, 124], "primarili": [38, 109, 112], "print": [126, 161, 167], "print_refer": 161, "printout": [2, 5, 8, 44, 122, 124, 126, 161], "prior": [124, 168, 169], "process": [17, 19, 20, 40, 42, 47, 48, 49, 50, 57, 58, 60, 61, 62, 63, 65, 75, 76, 77, 78, 79, 80, 81, 103, 105, 163, 167], "process_counties_with_openai": [163, 167], "process_using_futur": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "processpoolexecutor": [54, 60, 61], "processpoolservic": 60, "program": [69, 167], "prompt": [40, 44, 110, 116, 124, 126, 161], "prompt_token": 67, "propag": [142, 143], "properli": 63, "properti": [2, 3, 5, 8, 40, 42, 44, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 83, 85, 97, 98, 119, 120, 122, 124, 126, 147, 148, 149, 161], "provid": [42, 47, 48, 49, 50, 65, 85, 120, 124, 151, 161, 167, 168], "psql": 148, "public": [120, 168], "publish": 168, "pull": [54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 151, 155, 159], "purpos": [167, 168], "put": 167, "put_nowait": 103, "pw_launch_kwarg": [14, 52, 53, 151, 155], "py": [166, 167], "pypdf2": 166, "pytesseract": [54, 63, 135, 167], "pytesseract_exe_fp": [54, 167], "python": [83, 102, 122, 166], "q": 151, "qualiti": 54, "queri": [2, 5, 8, 22, 40, 42, 47, 48, 49, 50, 65, 70, 110, 113, 114, 115, 116, 117, 122, 124, 153, 157, 161, 167], "query_embed": 161, "question": [2, 5, 8, 40, 83, 110, 122, 124, 161, 166], "queu": [58, 102, 103], "queue": [3, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 83, 103, 106], "queued_log": 53, "queuehandl": 103, "quit": 124, "quot": [124, 157], "r": 138, "rais": [3, 85, 119, 120, 142, 143], "raiseexcept": 103, "random": [142, 143], "rang": 119, "rank": 161, "rank_str": 161, "rate": [2, 3, 5, 8, 54, 57, 65, 85, 122, 124, 142, 143, 161], "rate_limit": [2, 3, 5, 8, 57, 65, 122, 124, 161], "rate_track": [57, 65], "ratelimitedservic": 65, "ratelimiterror": [142, 143], "raw": [2, 5, 8, 115, 122, 124, 147, 148, 149, 161], "raw_pag": [22, 147, 148, 149], "re": [2, 3, 5, 8, 19, 122, 124, 138, 161], "reach": [5, 19], "read": [62, 63, 134, 135, 151], "readm": 135, "recent": 17, "recognit": 135, "recommend": [54, 167], "record": [44, 103, 105, 106, 107, 119, 120, 126], "recurs": [124, 168], "recursivecharactertextsplitt": 54, "reduc": [2, 5, 8, 122, 124, 153, 161], "redund": [124, 161], "ref_col": 161, "ref_list": 161, "refer": [5, 40, 66, 83, 110, 142, 143, 151, 161, 167], "regul": [42, 113], "relat": [65, 85, 111, 161], "related": 161, "releas": 103, "release_resourc": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "relev": [75, 76, 77, 78, 88, 161], "reliabl": [2, 5, 8, 122, 124, 161], "remain": 81, "remov": [17, 73, 85, 101, 103, 106, 119, 120, 122, 129, 136, 138, 157, 167], "removefilt": 103, "removehandl": 106, "render": 154, "renew": 168, "repeat": [72, 73, 139, 140, 142, 143], "replac": [81, 103, 131, 137, 139, 140], "repo": [166, 168, 169], "report": [166, 168], "repres": [2, 5, 8, 14, 39, 40, 57, 66, 97, 110, 122, 124, 161, 167, 168], "represent": [98, 100, 147, 158], "reproduc": 168, "request": [40, 67, 90, 110, 119, 151, 161], "request_json": [2, 3, 5, 8, 122, 124, 161], "requir": [122, 161, 163, 166, 167], "research": [2, 5, 8, 122, 123, 124, 161, 168], "resembl": 40, "reset": 161, "resourc": [57, 58, 65, 83, 103], "respons": [2, 3, 5, 8, 26, 27, 28, 40, 44, 48, 49, 57, 58, 60, 61, 65, 67, 75, 76, 77, 78, 79, 80, 81, 85, 100, 110, 119, 120, 122, 124, 126, 161], "response_pars": 85, "response_token": 67, "rest": 151, "restrict": 39, "result": [2, 5, 8, 14, 17, 18, 19, 20, 52, 53, 54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 105, 122, 124, 153, 157, 161, 167], "retain": 168, "retri": [3, 65], "retriev": [40, 50, 52, 53, 54, 71, 110, 118, 119, 153, 166, 167], "retrieve_doc": 166, "return": [2, 3, 5, 8, 14, 17, 18, 19, 20, 22, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 42, 44, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 66, 67, 71, 72, 75, 76, 77, 78, 79, 80, 81, 85, 88, 89, 90, 100, 101, 103, 105, 107, 110, 111, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 148, 151, 153, 155, 157, 158, 159, 161], "return_chat_obj": 161, "revers": 119, "rex": 126, "road": 42, "role": [2, 3, 5, 8, 122, 124, 161], "routin": 167, "row": [139, 140, 167], "run": [2, 3, 5, 8, 18, 19, 20, 44, 54, 58, 61, 69, 80, 83, 122, 124, 126, 151, 161, 165, 167, 168, 169], "run_app": 166, "run_async": [2, 5, 8, 122, 124, 161], "runtim": 124, "runtimeerror": [11, 94], "safeti": [2, 3, 5, 8, 122, 124, 161], "sai": 126, "same": [2, 3, 5, 8, 40, 72, 110, 122, 124, 161], "sampl": 167, "satisfi": [44, 126], "scan": [135, 151, 167], "scientist": 124, "scope": 113, "score": [115, 161], "score_thresh": 115, "scrape": [42, 54, 145, 152, 154, 156, 167, 168, 169], "script": 166, "search": [14, 37, 44, 52, 53, 54, 119, 126, 152, 153, 157, 161], "second": [83, 113, 114, 117, 142, 143, 151], "see": [85, 119, 142, 143, 167, 168, 169], "seem": 124, "self": [2, 5, 8, 119, 122, 124, 161], "semant": 161, "semaphor": [14, 52, 53, 151, 155], "send": [103, 161], "sens": 167, "sent": 106, "sentenc": [36, 37], "separ": [122, 124, 133, 166], "serial": [8, 103, 122], "servic": [42, 47, 48, 49, 50, 54, 142, 143], "service_nam": [71, 72, 73], "set": [17, 53, 54, 57, 63, 66, 85, 103, 106, 110, 113, 114, 117, 119, 120, 122, 142, 143, 158, 166, 167], "set_result": [57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81], "setback": [29, 30, 31, 33, 34, 42], "setdefault": [85, 120], "setformatt": 103, "setlevel": 103, "seto": 168, "setup": [25, 29, 30, 31, 32, 33, 34, 126, 165], "sever": [115, 124], "shadow": 42, "shallow": [119, 120], "share": [122, 129], "short": [113, 114], "should": [14, 17, 18, 19, 39, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 90, 103, 110, 115, 126, 148, 151, 159, 163, 167], "show": 163, "shutdown": [60, 61, 75, 76, 77, 78, 79, 80, 81], "side": [2, 3, 5, 8, 122, 124, 161], "signal": [142, 143], "silent": 103, "similar": [122, 129], "simpl": [49, 126], "simultan": [142, 143], "singl": [2, 5, 8, 14, 17, 19, 20, 44, 52, 53, 103, 113, 115, 120, 122, 124, 126, 130, 161, 167], "singleton": 70, "size": [32, 54], "skip": 151, "slightli": [142, 143], "slow_mo": 153, "smaller": [161, 167], "snippet": 17, "so": [19, 73, 104, 142, 143, 154, 157, 167, 168], "softwar": [122, 168], "solar": 168, "some": [50, 84, 85, 117, 122, 157, 161, 167], "someth": [48, 49], "sometim": 110, "somewhat": [124, 161], "sort": [119, 161], "sourc": [2, 3, 5, 8, 10, 11, 14, 17, 18, 19, 20, 22, 24, 26, 27, 28, 29, 30, 31, 32, 33, 34, 36, 37, 39, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 62, 63, 65, 66, 67, 69, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 88, 89, 90, 92, 93, 94, 95, 97, 98, 100, 101, 103, 104, 105, 106, 107, 108, 110, 111, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 142, 143, 147, 148, 149, 151, 153, 155, 157, 158, 159, 161, 165, 168, 169], "space": [122, 133], "specif": [2, 5, 8, 54, 96, 105, 122, 124, 161, 167, 168, 169], "specifi": [54, 85, 103, 107, 120, 151, 167], "split": [5, 36, 122, 129, 133], "split_on": [5, 122, 129, 149], "split_text": [14, 17, 18, 19, 52, 53, 148], "splitter": [14, 17, 18, 19, 52, 53, 148], "ssl": 151, "stabl": 119, "stackoverflow": 83, "start": [26, 44, 119, 122, 126, 129, 166, 167], "state": [54, 88, 90, 97, 113, 114, 115, 117, 167], "statement": [44, 126, 167], "static": [2, 5, 8, 116, 120, 122, 124, 161], "statist": 67, "still": [5, 19, 54], "stop": 119, "storag": 78, "store": [2, 3, 5, 8, 17, 18, 19, 20, 48, 49, 50, 52, 53, 54, 75, 76, 77, 78, 84, 85, 122, 124, 159], "storefileondisk": [75, 76, 77], "str": [2, 3, 5, 8, 14, 17, 18, 19, 26, 27, 28, 36, 37, 39, 40, 42, 44, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 65, 66, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 85, 97, 98, 100, 101, 103, 104, 105, 106, 110, 111, 113, 114, 115, 116, 117, 119, 120, 122, 124, 126, 129, 130, 131, 132, 133, 137, 138, 139, 140, 147, 148, 149, 151, 155, 157, 158, 159, 161], "stream": 161, "streamlit": 165, "strictli": 111, "string": [2, 3, 5, 8, 26, 27, 28, 39, 40, 48, 49, 60, 65, 66, 79, 100, 103, 104, 105, 106, 107, 110, 113, 114, 119, 122, 124, 130, 133, 136, 147, 148, 149, 151, 157, 158, 159, 161], "strip": [111, 149], "strip_nested_bracket": 120, "structur": [12, 22, 40, 41, 42, 50, 54, 110, 113, 114, 115, 116, 117], "structured_llm_cal": [22, 40, 110, 113, 114, 115, 116, 117], "structuredllmcal": [22, 40, 110, 113, 114, 115, 116, 117], "sub": [5, 37, 85], "sub_label": 85, "subclass": 103, "subdivis": 113, "submiss": [3, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 142, 143], "submit": [3, 142, 143], "submit_job": 3, "subsequ": [142, 143, 161], "subset": [3, 167], "subtract": [83, 161], "success": [19, 151], "successor": [44, 126], "succinct": 39, "summar": [123, 124, 168], "summari": [2, 5, 8, 122, 161], "superset": 37, "suppli": 131, "support": [42, 47, 48, 50, 167], "sure": [5, 8, 117, 167, 168, 169], "sustain": 168, "symbol": 167, "synchron": 143, "sys_msg": [49, 50], "system": [2, 3, 5, 8, 38, 40, 48, 49, 50, 103, 109, 110, 113, 114, 116, 117, 122, 124, 161], "system_messag": [48, 113, 114, 116, 117], "t": [40, 167], "tab": 135, "tabl": [8, 131, 161], "tablefmt": 148, "tag": [5, 131, 158], "take": [2, 5, 8, 14, 17, 18, 19, 40, 52, 53, 85, 110, 122, 124, 148, 157, 161], "taken": 19, "task": [54, 103, 107, 113, 114, 116, 117], "td_kwarg": [54, 79], "technic": 166, "technologi": [38, 109, 165, 168], "temp": [60, 61, 75, 76, 77, 78, 79, 80, 81], "temperatur": [2, 3, 5, 8, 122, 124, 161], "tempfil": [54, 79], "templat": [151, 167], "temporari": 54, "temporarydirectori": [54, 79], "termin": [163, 167], "tesseract_cmd": 63, "test": [5, 37], "text": [2, 4, 5, 7, 8, 14, 15, 17, 18, 19, 20, 29, 31, 32, 35, 36, 37, 39, 40, 42, 44, 50, 52, 53, 54, 60, 62, 63, 75, 77, 79, 101, 110, 111, 113, 114, 120, 121, 122, 124, 126, 129, 130, 131, 132, 133, 134, 136, 137, 138, 139, 140, 147, 148, 149, 151, 154, 157, 159, 161, 166, 168], "text_chunk": [39, 40, 101, 110, 124], "text_splitt": [14, 17, 18, 19, 52, 53, 54, 148], "text_splitter_chunk_overlap": 54, "text_splitter_chunk_s": 54, "text_summari": 124, "than": [40, 83, 110, 111, 113, 122, 133], "thei": [5, 103, 113, 135, 167], "them": 119, "therefor": [54, 142, 143], "thi": [2, 3, 5, 8, 14, 17, 18, 19, 20, 35, 37, 40, 42, 44, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 63, 65, 66, 67, 71, 72, 73, 75, 76, 77, 78, 79, 80, 81, 83, 85, 88, 97, 100, 102, 103, 104, 105, 110, 111, 119, 120, 122, 124, 126, 129, 131, 134, 135, 136, 142, 143, 147, 148, 149, 151, 153, 157, 161, 163, 166, 167, 168], "thing": 167, "third": [113, 114], "those": [22, 83], "thread": [54, 60, 61, 103, 151], "threadedservic": [78, 79, 81], "threadpoolexecutor": [54, 75, 76, 77, 78, 79, 80, 81], "three": [67, 113, 114, 139, 140], "threshold": [19, 115], "through": [107, 148, 161, 166, 167], "throw": 65, "tidi": 103, "tiktoken": 66, "time": [3, 57, 65, 83, 84, 105, 142, 143, 161], "timeboundedusagetrack": [57, 65], "timeout": [54, 103], "tinyurl": 135, "titl": 120, "to_markdown": 131, "togeth": [107, 167], "token": [2, 3, 5, 8, 42, 47, 48, 49, 50, 54, 65, 66, 122, 124, 161, 167], "token_budget": 161, "tokens_per_chunk": 5, "too": 54, "tool": [15, 157], "top": [85, 153, 161], "top_n": 161, "total": [57, 66, 83, 85, 149], "township": [113, 114, 117], "tpe_kwarg": [54, 75, 76, 77, 78, 79, 81], "track": [57, 65, 67, 81, 82, 83, 85], "tracker": [42, 47, 48, 49, 50, 57, 65, 67, 81, 85], "transit": [44, 126], "travers": [44, 126], "tree": [25, 29, 30, 31, 32, 33, 34, 167, 168], "tri": [2, 3, 5, 8, 122, 124, 161], "trigger": [2, 5, 8, 122, 124, 161], "troubl": 167, "true": [2, 3, 5, 8, 17, 26, 27, 28, 37, 40, 60, 75, 76, 77, 78, 79, 81, 84, 107, 110, 111, 113, 114, 115, 116, 117, 122, 124, 132, 133, 134, 135, 142, 143, 147, 148, 149, 151, 158, 161], "try": 134, "tupl": [17, 22, 36, 85, 88, 120, 122, 129, 142, 143, 161], "turbin": 32, "turbo": [2, 3, 5, 8, 122, 124, 126, 161], "two": [117, 119, 139, 167], "txt": 8, "type": [2, 3, 5, 8, 40, 44, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 83, 97, 98, 122, 124, 147, 148, 149, 161], "typic": [2, 3, 5, 8, 30, 44, 50, 60, 65, 79, 119, 120, 122, 124, 126, 153, 159, 161], "u": [114, 117, 151, 168], "ultim": 65, "unchang": 81, "under": [48, 49, 50, 54, 57, 65, 81, 85, 168], "underli": [42, 47, 48, 49, 50, 57, 58, 60, 61, 65, 75, 76, 77, 78, 79, 80, 81, 103, 167], "understand": 167, "unincorpor": 113, "uniqu": [14, 52, 53, 54, 60, 79, 104, 158, 159], "unless": [158, 167], "unpickl": 103, "untest": 54, "until": [5, 19], "up": [4, 5, 8, 35, 57, 58, 63, 65, 103, 122, 124, 166, 167, 168], "updat": [65, 67, 81, 85, 120, 129], "update_from_model": 85, "upgrad": 151, "url": [2, 3, 5, 8, 14, 52, 53, 54, 60, 79, 88, 115, 117, 119, 120, 122, 124, 151, 155, 158, 159, 161], "us": [2, 3, 5, 8, 14, 17, 18, 19, 20, 22, 29, 30, 31, 32, 33, 34, 38, 39, 40, 42, 47, 48, 49, 50, 52, 53, 54, 57, 58, 60, 61, 63, 65, 66, 75, 76, 77, 78, 79, 80, 81, 85, 88, 103, 109, 110, 112, 113, 114, 115, 116, 117, 122, 124, 135, 149, 151, 154, 155, 159, 161, 166, 167, 168], "usag": [2, 5, 8, 42, 47, 48, 49, 50, 54, 57, 65, 67, 81, 122, 124, 161, 168, 169], "usage_fp": 81, "usage_from_respons": 85, "usage_sub_label": [42, 47, 48, 49, 50, 65], "usage_track": [42, 47, 48, 49, 50, 65], "usagetrack": [42, 47, 48, 49, 50, 65, 81], "used_index": 161, "user": [2, 3, 5, 8, 103, 122, 124, 135, 151, 161], "userdict": 85, "util": [4, 14, 24, 42, 49, 52, 53, 57, 64, 82, 118, 122, 166], "uuid": [60, 79, 158, 159], "v": [34, 85, 120, 122, 161, 163, 167], "v1": [2, 3, 5, 8, 119, 122, 124, 161], "v9xr4vrj": 135, "valid": [19, 22, 35, 38, 40, 44, 104, 126, 151, 158, 161, 167], "validate_clean": 122, "validationwithmemori": 40, "valu": [14, 17, 19, 20, 29, 30, 31, 32, 33, 34, 42, 52, 53, 54, 57, 60, 61, 62, 63, 75, 76, 77, 78, 79, 80, 81, 83, 84, 85, 103, 111, 119, 120, 148, 149, 151, 153, 155, 161], "valueerror": [95, 119], "var": [124, 161], "variabl": [2, 5, 8, 54, 122, 124, 161], "vector": 168, "verbos": [134, 135, 163, 167], "veri": 5, "verify_ssl": 151, "version": [54, 103, 163, 167], "vertic": [122, 133], "veto": 103, "via": 54, "view": [85, 120, 168], "visual": 153, "vote": 115, "wa": [14, 17, 19, 40, 50, 52, 53, 71, 75, 76, 77, 78, 85, 103, 122, 134, 135, 151, 168], "wai": [117, 135], "wait": 3, "waiting_on": 3, "want": [103, 161, 167], "we": [2, 3, 5, 8, 39, 122, 124, 154, 161, 166, 167], "web": [14, 17, 18, 19, 20, 22, 52, 53, 54, 60, 62, 63, 75, 76, 77, 78, 79, 115, 132, 167], "webp": 151, "websit": [88, 89, 90], "wec": [40, 110], "well": [5, 17, 40, 42, 54, 110, 122, 124, 167], "went": [48, 49], "were": [37, 90], "wether": [19, 58, 61, 80, 97, 113, 114, 117], "weto": 168, "what": 103, "when": [3, 54, 85, 103, 115, 132, 147, 151, 166, 167], "where": [2, 5, 8, 36, 40, 44, 66, 75, 76, 77, 78, 81, 88, 106, 110, 122, 124, 126, 147, 148, 149, 153, 159, 161], "whether": 103, "which": [8, 19, 36, 39, 40, 42, 44, 54, 60, 67, 79, 81, 85, 90, 101, 103, 107, 110, 113, 114, 117, 119, 120, 122, 126, 133, 148, 149, 159, 166, 167], "while": 103, "why": 167, "win": 111, "wind": [38, 40, 109, 111, 168], "wind_db": 54, "window": [111, 135], "windshield": 111, "wish": [103, 167], "within": [30, 103, 113, 129, 131], "without": [2, 5, 8, 44, 107, 122, 124, 126, 134, 142, 143, 161], "wizard": 165, "word": [19, 36, 37, 111, 124], "work": [5, 14, 17, 18, 19, 52, 53, 63, 103, 122, 124, 142, 143, 148, 166, 167, 168, 169], "workflow": 168, "worldwid": 168, "wrap": [83, 103], "write": [54, 60, 75, 77, 79, 104, 106, 147, 159, 161], "write_kwarg": 147, "written": [81, 151], "wrong": [48, 49], "wrong_counti": 114, "wrong_stat": 114, "www": [102, 119, 151], "x": [113, 126], "x0c": 138, "xhtml": 151, "xml": 151, "xx": 107, "y": 113, "ye": 28, "year": [22, 120], "you": [2, 3, 5, 8, 18, 19, 20, 103, 113, 114, 117, 122, 124, 126, 135, 153, 161, 166, 167, 168, 169], "younger": 83, "your": [48, 49, 50, 65, 113, 114, 117, 142, 143, 166, 167], "zero": 103, "zopatista": 102}, "titles": ["elm", "elm.base", "elm.base.ApiBase", "elm.base.ApiQueue", "elm.chunk", "elm.chunk.Chunker", "elm.cli", "elm.embed", "elm.embed.ChunkAndEmbed", "elm.exceptions", "elm.exceptions.ELMError", "elm.exceptions.ELMRuntimeError", "elm.ords", "elm.ords.download", "elm.ords.download.download_county_ordinance", "elm.ords.extraction", "elm.ords.extraction.apply", "elm.ords.extraction.apply.check_for_ordinance_info", "elm.ords.extraction.apply.extract_ordinance_text_with_llm", "elm.ords.extraction.apply.extract_ordinance_text_with_ngram_validation", "elm.ords.extraction.apply.extract_ordinance_values", "elm.ords.extraction.date", "elm.ords.extraction.date.DateExtractor", "elm.ords.extraction.features", "elm.ords.extraction.features.SetbackFeatures", "elm.ords.extraction.graphs", "elm.ords.extraction.graphs.llm_response_does_not_start_with_no", "elm.ords.extraction.graphs.llm_response_starts_with_no", "elm.ords.extraction.graphs.llm_response_starts_with_yes", "elm.ords.extraction.graphs.setup_base_graph", "elm.ords.extraction.graphs.setup_conditional", "elm.ords.extraction.graphs.setup_graph_extra_restriction", "elm.ords.extraction.graphs.setup_graph_wes_types", "elm.ords.extraction.graphs.setup_multiplier", "elm.ords.extraction.graphs.setup_participating_owner", "elm.ords.extraction.ngrams", "elm.ords.extraction.ngrams.convert_text_to_sentence_ngrams", "elm.ords.extraction.ngrams.sentence_ngram_containment", "elm.ords.extraction.ordinance", "elm.ords.extraction.ordinance.OrdinanceExtractor", "elm.ords.extraction.ordinance.OrdinanceValidator", "elm.ords.extraction.parse", "elm.ords.extraction.parse.StructuredOrdinanceParser", "elm.ords.extraction.tree", "elm.ords.extraction.tree.AsyncDecisionTree", "elm.ords.llm", "elm.ords.llm.calling", "elm.ords.llm.calling.BaseLLMCaller", "elm.ords.llm.calling.ChatLLMCaller", "elm.ords.llm.calling.LLMCaller", "elm.ords.llm.calling.StructuredLLMCaller", "elm.ords.process", "elm.ords.process.download_doc_for_county", "elm.ords.process.download_docs_for_county_with_logging", "elm.ords.process.process_counties_with_openai", "elm.ords.services", "elm.ords.services.base", "elm.ords.services.base.RateLimitedService", "elm.ords.services.base.Service", "elm.ords.services.cpu", "elm.ords.services.cpu.PDFLoader", "elm.ords.services.cpu.ProcessPoolService", "elm.ords.services.cpu.read_pdf_doc", "elm.ords.services.cpu.read_pdf_doc_ocr", "elm.ords.services.openai", "elm.ords.services.openai.OpenAIService", "elm.ords.services.openai.count_tokens", "elm.ords.services.openai.usage_from_response", "elm.ords.services.provider", "elm.ords.services.provider.RunningAsyncServices", "elm.ords.services.queues", "elm.ords.services.queues.get_service_queue", "elm.ords.services.queues.initialize_service_queue", "elm.ords.services.queues.tear_down_service_queue", "elm.ords.services.threaded", "elm.ords.services.threaded.CleanedFileWriter", "elm.ords.services.threaded.FileMover", "elm.ords.services.threaded.OrdDBFileWriter", "elm.ords.services.threaded.StoreFileOnDisk", "elm.ords.services.threaded.TempFileCache", "elm.ords.services.threaded.ThreadedService", "elm.ords.services.threaded.UsageUpdater", "elm.ords.services.usage", "elm.ords.services.usage.TimeBoundedUsageTracker", "elm.ords.services.usage.TimedEntry", "elm.ords.services.usage.UsageTracker", "elm.ords.utilities", "elm.ords.utilities.counties", "elm.ords.utilities.counties.county_websites", "elm.ords.utilities.counties.load_all_county_info", "elm.ords.utilities.counties.load_counties_from_fp", "elm.ords.utilities.exceptions", "elm.ords.utilities.exceptions.ELMOrdsError", "elm.ords.utilities.exceptions.ELMOrdsNotInitializedError", "elm.ords.utilities.exceptions.ELMOrdsRuntimeError", "elm.ords.utilities.exceptions.ELMOrdsValueError", "elm.ords.utilities.location", "elm.ords.utilities.location.County", "elm.ords.utilities.location.Location", "elm.ords.utilities.parsing", "elm.ords.utilities.parsing.llm_response_as_json", "elm.ords.utilities.parsing.merge_overlapping_texts", "elm.ords.utilities.queued_logging", "elm.ords.utilities.queued_logging.LocalProcessQueueHandler", "elm.ords.utilities.queued_logging.LocationFileLog", "elm.ords.utilities.queued_logging.LocationFilter", "elm.ords.utilities.queued_logging.LogListener", "elm.ords.utilities.queued_logging.NoLocationFilter", "elm.ords.validation", "elm.ords.validation.content", "elm.ords.validation.content.ValidationWithMemory", "elm.ords.validation.content.possibly_mentions_wind", "elm.ords.validation.location", "elm.ords.validation.location.CountyJurisdictionValidator", "elm.ords.validation.location.CountyNameValidator", "elm.ords.validation.location.CountyValidator", "elm.ords.validation.location.FixedMessageValidator", "elm.ords.validation.location.URLValidator", "elm.osti", "elm.osti.OstiList", "elm.osti.OstiRecord", "elm.pdf", "elm.pdf.PDFtoTXT", "elm.summary", "elm.summary.Summary", "elm.tree", "elm.tree.DecisionTree", "elm.utilities", "elm.utilities.parse", "elm.utilities.parse.clean_headers", "elm.utilities.parse.combine_pages", "elm.utilities.parse.format_html_tables", "elm.utilities.parse.html_to_text", "elm.utilities.parse.is_multi_col", "elm.utilities.parse.read_pdf", "elm.utilities.parse.read_pdf_ocr", "elm.utilities.parse.remove_blank_pages", "elm.utilities.parse.remove_empty_lines_or_page_footers", "elm.utilities.parse.replace_common_pdf_conversion_chars", "elm.utilities.parse.replace_excessive_newlines", "elm.utilities.parse.replace_multi_dot_lines", "elm.utilities.retry", "elm.utilities.retry.async_retry_with_exponential_backoff", "elm.utilities.retry.retry_with_exponential_backoff", "elm.version", "elm.web", "elm.web.document", "elm.web.document.BaseDocument", "elm.web.document.HTMLDocument", "elm.web.document.PDFDocument", "elm.web.file_loader", "elm.web.file_loader.AsyncFileLoader", "elm.web.google_search", "elm.web.google_search.PlaywrightGoogleLinkSearch", "elm.web.html_pw", "elm.web.html_pw.load_html_with_pw", "elm.web.utilities", "elm.web.utilities.clean_search_query", "elm.web.utilities.compute_fn_from_url", "elm.web.utilities.write_url_doc_to_file", "elm.wizard", "elm.wizard.EnergyWizard", "Command Line Interfaces (CLIs)", "elm", "<no title>", "Examples", "The Energy Wizard", "Ordinance GPT", "Energy Language Model (ELM)", "Installation"], "titleterms": {"The": 166, "acknowledg": 168, "apibas": 2, "apiqueu": 3, "app": 166, "appli": [16, 17, 18, 19, 20], "async_retry_with_exponential_backoff": 142, "asyncdecisiontre": 44, "asyncfileload": 151, "base": [1, 2, 3, 56, 57, 58], "basedocu": 147, "basellmcal": 47, "call": [46, 47, 48, 49, 50], "chatllmcal": 48, "check_for_ordinance_info": 17, "chunk": [4, 5], "chunkandemb": 8, "chunker": 5, "clean_head": 129, "clean_search_queri": 157, "cleanedfilewrit": 75, "cli": [6, 162], "combine_pag": 130, "command": 162, "compute_fn_from_url": 158, "content": [109, 110, 111], "convert_text_to_sentence_ngram": 36, "count_token": 66, "counti": [87, 88, 89, 90, 97], "county_websit": 88, "countyjurisdictionvalid": 113, "countynamevalid": 114, "countyvalid": 115, "cpu": [59, 60, 61, 62, 63], "date": [21, 22], "dateextractor": 22, "debug": 167, "decisiontre": 126, "document": [146, 147, 148, 149, 167], "download": [13, 14, 166], "download_county_ordin": 14, "download_doc_for_counti": 52, "download_docs_for_county_with_log": 53, "elm": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 163, 168], "elmerror": 10, "elmordserror": 92, "elmordsnotinitializederror": 93, "elmordsruntimeerror": 94, "elmordsvalueerror": 95, "elmruntimeerror": 11, "emb": [7, 8], "embed": 166, "energi": [166, 168], "energywizard": 161, "exampl": 165, "except": [9, 10, 11, 91, 92, 93, 94, 95], "execut": 167, "extens": 167, "extract": [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44], "extract_ordinance_text_with_llm": 18, "extract_ordinance_text_with_ngram_valid": 19, "extract_ordinance_valu": 20, "featur": [23, 24], "file_load": [150, 151], "filemov": 76, "fixedmessagevalid": 116, "format_html_t": 131, "get_service_queu": 71, "google_search": [152, 153], "gpt": 167, "graph": [25, 26, 27, 28, 29, 30, 31, 32, 33, 34], "html_pw": [154, 155], "html_to_text": 132, "htmldocument": 148, "initialize_service_queu": 72, "instal": [168, 169], "interfac": 162, "is_multi_col": 133, "languag": 168, "line": 162, "llm": [45, 46, 47, 48, 49, 50], "llm_response_as_json": 100, "llm_response_does_not_start_with_no": 26, "llm_response_starts_with_no": 27, "llm_response_starts_with_y": 28, "llmcaller": 49, "load_all_county_info": 89, "load_counties_from_fp": 90, "load_html_with_pw": 155, "localprocessqueuehandl": 103, "locat": [96, 97, 98, 112, 113, 114, 115, 116, 117], "locationfilelog": 104, "locationfilt": 105, "loglisten": 106, "merge_overlapping_text": 101, "model": 168, "ngram": [35, 36, 37], "nolocationfilt": 107, "openai": [64, 65, 66, 67], "openaiservic": 65, "ord": [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 163], "orddbfilewrit": 77, "ordin": [38, 39, 40, 167], "ordinanceextractor": 39, "ordinancevalid": 40, "osti": [118, 119, 120], "ostilist": 119, "ostirecord": 120, "other": 167, "pars": [41, 42, 99, 100, 101, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140], "pdf": [121, 122, 166], "pdfdocument": 149, "pdfloader": 60, "pdftotxt": 122, "playwrightgooglelinksearch": 153, "possibly_mentions_wind": 111, "prerequisit": 167, "process": [51, 52, 53, 54], "process_counties_with_openai": 54, "processpoolservic": 61, "provid": [68, 69], "queue": [70, 71, 72, 73], "queued_log": [102, 103, 104, 105, 106, 107], "ratelimitedservic": 57, "read_pdf": 134, "read_pdf_doc": 62, "read_pdf_doc_ocr": 63, "read_pdf_ocr": 135, "remove_blank_pag": 136, "remove_empty_lines_or_page_foot": 137, "replace_common_pdf_conversion_char": 138, "replace_excessive_newlin": 139, "replace_multi_dot_lin": 140, "retri": [141, 142, 143], "retry_with_exponential_backoff": 143, "run": 166, "runningasyncservic": 69, "sentence_ngram_contain": 37, "servic": [55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85], "setbackfeatur": 24, "setup": 167, "setup_base_graph": 29, "setup_condit": 30, "setup_graph_extra_restrict": 31, "setup_graph_wes_typ": 32, "setup_multipli": 33, "setup_participating_own": 34, "sourc": 167, "storefileondisk": 78, "streamlit": 166, "structuredllmcal": 50, "structuredordinancepars": 42, "summari": [123, 124], "tear_down_service_queu": 73, "technologi": 167, "tempfilecach": 79, "thread": [74, 75, 76, 77, 78, 79, 80, 81], "threadedservic": 80, "timeboundedusagetrack": 83, "timedentri": 84, "tree": [43, 44, 125, 126], "urlvalid": 117, "usag": [82, 83, 84, 85], "usage_from_respons": 67, "usagetrack": 85, "usageupdat": 81, "util": [86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 156, 157, 158, 159], "valid": [108, 109, 110, 111, 112, 113, 114, 115, 116, 117], "validationwithmemori": 110, "version": 144, "web": [145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159], "wizard": [160, 161, 166], "write_url_doc_to_fil": 159}}) \ No newline at end of file