Skip to content

Commit

Permalink
Merge pull request bazingagin#40 from EliahKagan/strings
Browse files Browse the repository at this point in the history
Improve code style for strings
  • Loading branch information
bazingagin authored Aug 3, 2023
2 parents d6824bb + 3a06b8c commit 6558e07
Show file tree
Hide file tree
Showing 7 changed files with 56 additions and 43 deletions.
6 changes: 3 additions & 3 deletions examples/ag_news.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,17 +70,17 @@ def fit_model(


def main() -> None:
print(f"Fetching data...")
print("Fetching data...")
((train_text, train_labels), (test_text, test_labels)) = get_data()

print(f"Fitting model...")
print("Fitting model...")
model = fit_model(train_text, train_labels)
random_indicies = np.random.choice(test_text.shape[0], 1000, replace=False)

sample_test_text = test_text[random_indicies]
sample_test_labels = test_labels[random_indicies]

print(f"Generating predictions...")
print("Generating predictions...")
top_k = 1

# Here we use the `sampling_percentage` to save time
Expand Down
6 changes: 3 additions & 3 deletions examples/imdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,10 +70,10 @@ def fit_model(


def main() -> None:
print(f"Fetching data...")
print("Fetching data...")
((train_text, train_labels), (test_text, test_labels)) = get_data()

print(f"Fitting model...")
print("Fitting model...")
model = fit_model(train_text, train_labels)

# Randomly sampling from the test set.
Expand All @@ -87,7 +87,7 @@ def main() -> None:
sample_test_text = test_text[random_indicies]
sample_test_labels = test_labels[random_indicies]

print(f"Generating predictions...")
print("Generating predictions...")
top_k = 1

# Here we use the `sampling_percentage` to save time
Expand Down
40 changes: 20 additions & 20 deletions npc_gzip/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ class InvalidCompressorException(Exception):

def __init__(self, compression_library: str) -> None:
self.message = f"""
Compression Library ({compression_library})
Compression Library ({compression_library})
is not currently supported.
"""
super().__init__(self.message)
Expand All @@ -25,10 +25,10 @@ class MissingDependencyException(Exception):

def __init__(self, compression_library: str) -> None:
self.message = f"""
Compression Library ({compression_library})
is missing an underlying dependency. Try
installing those missing dependencies and
load this again.
Compression Library ({compression_library})
is missing an underlying dependency. Try
installing those missing dependencies and
load this again.
Common missing dependencies for:
Expand All @@ -50,7 +50,7 @@ def __init__(
self.message = f"""
Unable to aggregate ({stringa}) and ({stringb}).
One or both of the two strings are too short to concatenate.
"""

if function_name is not None:
Expand All @@ -66,11 +66,11 @@ def __init__(
compressed_value_b: Optional[float] = None,
function_name: Optional[str] = None,
) -> None:
self.message = f"""
The combination of compressed values passed equal zero.
self.message = """
The combination of compressed values passed equal zero.
This will result in a divide by zero error.
"""

if function_name is not None:
Expand All @@ -91,7 +91,7 @@ def __init__(
arg1: {type(a)}
arg2: {type(b)}
arg3: {type(c)}
"""

if function_name is not None:
Expand All @@ -112,7 +112,7 @@ def __init__(
arg1: {array_a.shape}
arg2: {array_b.shape}
arg3: {array_c.shape}
"""

if function_name is not None:
Expand All @@ -128,11 +128,11 @@ def __init__(
function_name: Optional[str] = None,
) -> None:
self.message = f"""
The `distance_metric` ({distance_metric}) provided is not
The `distance_metric` ({distance_metric}) provided is not
currently supported. Please submit an Issue and/or
Pull Request here to add support:
https://github.com/bazingagin/npc_gzip
"""

if supported_distance_metrics is not None:
Expand All @@ -153,9 +153,9 @@ def __init__(
function_name: Optional[str] = None,
) -> None:
self.message = f"""
The type passed ({passed_type}) provided is not
currently supported.
The type passed ({passed_type}) provided is not
currently supported.
"""

if supported_types is not None:
Expand All @@ -174,13 +174,13 @@ def __init__(
function_name: Optional[str] = None,
) -> None:
self.message = f"""
If training labels are passed, the number
of training data samples must equal the
If training labels are passed, the number
of training data samples must equal the
number of training label samples
training_samples: {training_samples}
label_samples: {label_samples}
"""

if function_name is not None:
Expand Down
7 changes: 6 additions & 1 deletion npc_gzip/knn_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,12 @@ class KnnClassifier:
>>> training_labels = [random.randint(0, 1) for _ in range(len(training_data))]
>>> assert len(training_data) == len(training_labels)
>>> model = KnnClassifier(compressor=GZipCompressor(), training_inputs=training_data, training_labels=training_labels, distance_metric="ncd")
>>> model = KnnClassifier(
... compressor=GZipCompressor(),
... training_inputs=training_data,
... training_labels=training_labels,
... distance_metric="ncd",
... )
>>> test = np.array(["hey", "you are a real pain in my ass", "go away please"])
Expand Down
4 changes: 2 additions & 2 deletions npc_gzip/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ def generate_sentence(number_of_words: int = 10) -> str:
str: Sentence of random numbers and letters.
"""

assert number_of_words > 0, f"`number_of_words` must be greater than zero."
assert number_of_words > 0, "`number_of_words` must be greater than zero."

words = []
for word in range(number_of_words):
Expand Down Expand Up @@ -58,7 +58,7 @@ def generate_dataset(number_of_sentences: int) -> list:
list: List of sentences (str).
"""

assert number_of_sentences > 0, f"`number_of_sentences` must be greater than zero."
assert number_of_sentences > 0, "`number_of_sentences` must be greater than zero."

dataset = []
for sentence in range(number_of_sentences):
Expand Down
23 changes: 14 additions & 9 deletions original_codebase/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,13 +25,14 @@ def calc_dis(
self, data: list, train_data: Optional[list] = None, fast: bool = False
) -> None:
"""
Calculates the distance between either `data` and itself or `data` and `train_data`
and appends the distance to `self.distance_matrix`.
Calculates the distance between either `data` and itself or `data` and
`train_data` and appends the distance to `self.distance_matrix`.
Arguments:
data (list): Data to compute distance between.
train_data (list): [Optional] Training data to compute distance from `data`.
fast (bool): [Optional] Uses the _fast compression length function of `self.compressor`.
fast (bool): [Optional] Uses the _fast compression length function
of `self.compressor`.
Returns:
None: None
Expand Down Expand Up @@ -68,13 +69,14 @@ def calc_dis_with_single_compressed_given(
self, data: list, data_len: list = None, train_data: Optional[list] = None
) -> None:
"""
Calculates the distance between either `data`, `data_len`, or `train_data`
and appends the distance to `self.distance_matrix`.
Calculates the distance between either `data`, `data_len`, or
`train_data` and appends the distance to `self.distance_matrix`.
Arguments:
data (list): Data to compute distance between.
train_data (list): [Optional] Training data to compute distance from `data`.
fast (bool): [Optional] Uses the _fast compression length function of `self.compressor`.
fast (bool): [Optional] Uses the _fast compression length function
of `self.compressor`.
Returns:
None: None
Expand Down Expand Up @@ -186,7 +188,8 @@ def calc_acc(
k (int?): TODO
label (list): Predicted Labels.
train_label (list): Correct Labels.
provided_distance_matrix (list): Calculated Distance Matrix to use instead of `self.distance_matrix`.
provided_distance_matrix (list): Calculated Distance Matrix to use
instead of `self.distance_matrix`.
rand (bool): TODO
Returns:
Expand Down Expand Up @@ -245,7 +248,8 @@ def combine_dis_acc(
train_label: Optional[list] = None,
) -> tuple:
"""
Calculates the distance and the accuracy of the algorithm for data with training.
Calculates the distance and the accuracy of the algorithm for data with
training.
Arguments:
k (int?): TODO
Expand Down Expand Up @@ -304,7 +308,8 @@ def combine_dis_acc_single(
label: Any, # int, as used in this application
) -> tuple:
"""
Calculates the distance and the accuracy of the algorithm for a single datum with training.
Calculates the distance and the accuracy of the algorithm for a single
datum with training.
Arguments:
k (int?): TODO
Expand Down
13 changes: 8 additions & 5 deletions original_codebase/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ def NCD(c1: float, c2: float, c12: float) -> float:
Arguments:
c1 (float): The compressed length of the first object.
c2 (float): The compressed length of the second object.
c12 (float): The compressed length of the concatenation of the first and second objects.
c12 (float): The compressed length of the concatenation of the first
and second objects.
Returns:
float: The Normalized Compression Distance c1 and c2.
Expand Down Expand Up @@ -51,7 +52,8 @@ def CDM(c1: float, c2: float, c12: float) -> float:
Arguments:
c1 (float): The compressed length of the first object.
c2 (float): The compressed length of the second object.
c12 (float): The compressed length of the concatenation of the first and second objects.
c12 (float): The compressed length of the concatenation of the first
and second objects.
Returns:
float: The Compound Dissimilarity Measure value between c1 and c2.
Expand All @@ -72,7 +74,8 @@ def MSE(v1: np.ndarray, v2: np.ndarray) -> float:
v2 (np.ndarray): The second array.
Returns:
float: The Mean Squared Error value, representing the average squared difference between v1 and v2.
float: The Mean Squared Error value, representing the average squared
difference between v1 and v2.
Formula:
MSE(v1, v2) = Σ((v1 - v2) ** 2) / len(v1)
Expand Down Expand Up @@ -200,8 +203,8 @@ def agg_by_min_or_max(
Arguments:
i1 (torch.Tensor): First series of numbers.
i2 (torch.Tensor): Second series of numbers.
aggregate_by_minimum (bool): True if you want to take the minimum of the two series.
False if you want to take the maximum instead.
aggregate_by_minimum (bool): True to take the minimum of the two series.
False to take the maximum instead.
Returns:
torch.Tensor: Average of the two series.
Expand Down

0 comments on commit 6558e07

Please sign in to comment.