diff --git a/.DS_Store b/.DS_Store index 70e1230..f36febd 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/SynRD/.DS_Store b/SynRD/.DS_Store index 0b27edd..04efd20 100644 Binary files a/SynRD/.DS_Store and b/SynRD/.DS_Store differ diff --git a/SynRD/synthesizers/.DS_Store b/SynRD/synthesizers/.DS_Store new file mode 100644 index 0000000..d5041ef Binary files /dev/null and b/SynRD/synthesizers/.DS_Store differ diff --git a/SynRD/synthesizers/synthesizer.py b/SynRD/synthesizers/synthesizer.py index f0f84f2..84ae7dd 100644 --- a/SynRD/synthesizers/synthesizer.py +++ b/SynRD/synthesizers/synthesizer.py @@ -136,23 +136,20 @@ class MSTSynthesizer(Synthesizer): """ MST: Maximum Spanning Tree synthesizer. - ---------- - Parameters - epsilon : float + Parameters: + epsilon (float): Privacy budget for the synthesizer - ----------- - Optional keyword arguments: - slide_range : bool = False + slide_range (bool = False): Specifies if the slide range transformation should be applied, this will make the minimal value of each column 0 before fitting. - thresh : float = 0.05 + thresh (float = 0.05): Specifies what the ratio of unique values to the column length should be for the column to be threated as cathegorical - preprocess_factor : float = 0.05 + preprocess_factor (float = 0.05): Amount of budget to be used for the data preprocessing - delta : float = 1e-09 + delta (float = 1e-09): Privacy parameter, should be small, in the range of 1/(n * sqrt(n)) - verbose: bool = False + verbose (bool = False): Print diagnostic information during processing """ def __init__( @@ -223,60 +220,57 @@ class PATECTGAN(Synthesizer): """ Conditional tabular GAN using Private Aggregation of Teacher Ensembles - ---------- - Parameters - epsilon : float + Parameters: + epsilon (float): float Privacy budget for the synthesizer - ----------- - Optional keyword arguments: - slide_range : bool = False + slide_range (bool = False): Specifies if the slide range transformation should be applied, this will make the minimal value of each column 0 before fitting. - thresh : float = 0.05 + thresh (float = 0.05): float = 0.05 Specifies what the ratio of unique values to the column length should be for the column to be threated as cathegorical - preprocess_factor : float = 0.05 + preprocess_factor (float = 0.05): Amount of budget to be used for the data preprocessing - embedding_dim : int = 128 + embedding_dim (int = 128): Dimension of the embeding - generator_dim : tuple = (256, 256) + generator_dim (tuple = (256, 256)): Dimension of the generator - discriminator_dim : tuple = (256, 256) + discriminator_dim (tuple = (256, 256)): Dimension of the discriminstor - generator_lr : float = 0.0002 + generator_lr (float = 0.0002): Generator's learning rate - generator_decay : float = 1e-06 + generator_decay (float = 1e-06): Generator's decay - discriminator_lr : float = 0.002 + discriminator_lr (float = 0.002): Discriminator's learning rate - discriminator_decay : float = 1e-06 + discriminator_decay (float = 1e-06): Discriminator's decay - batch_size : int = 500 + batch_size (int = 500): Number of samples in one batch (for one training step) - verbose : bool = True + verbose (bool = True): Specifies if training information should be printed or not - epochs : int = 300 + epochs (int = 300): Number of training epochs - pac : int = 1 + pac (int = 1): Number of pacs to use (useful for dealing with mode collapse) - cuda : bool|string = True + cuda (bool|string = True): Specifies if cuda should be used for computation. Providing a string will result in using the specific device. - regularization : string = None + regularization (string = None): Which regularization to use. At the moment only dragan is possible. - loss : string = 'cross_entropy' + loss (string = 'cross_entropy'): Type of loss to be used - teacher_iters : int = 5 + teacher_iters (int = 5): Number of iterations for training the teachers - student_iters : int = 5 + student_iters (int = 5): Number of iterations for training the students - delta : float = None + delta (float = None): privacy parameter, should be small, in the range of 1/(n * sqrt(n)) - sample_per_teacher : int = 1000 + sample_per_teacher (int = 1000): Number of samples for training one teacher - noise_multiplier : float = 0.001 + noise_multiplier (float = 0.001): Multiplier for Laplace noise - moments_order : int = 100 + moments_order (int = 100): Number of moments to be used in moments accountant method """ @@ -419,24 +413,22 @@ class PrivBayes(Synthesizer): Synthesizer which uses bayesian approach. ---------- - Parameters - epsilon : float + Parameters: + epsilon (float): Privacy budget for the synthesizer - ----------- - Optional keyword arguments: - slide_range : bool = False + slide_range (bool = False): Specifies if the slide range transformation should be applied, this will make the minimal value of each column 0 before fitting. - thresh : float = 0.05 + thresh (float = 0.05): Specifies what the ratio of unique values to the column length should be for the column to be threated as cathegorical - privbayes_limit : int = 20 + privbayes_limit (int = 20): If number of unique values in the column exceeds this limit, it will be binned - privbayes_bins : int = 10 + privbayes_bins (int = 10): Number of bins (if binning is happening) - temp_files_dir : str = 'temp' + temp_files_dir (str = 'temp'): Directory used to save the file produced by the data describer - seed : int = 0 + seed (int = 0): Random seed to be used """ @@ -568,31 +560,28 @@ class AIMTSynthesizer(Synthesizer): """ Synthesizer which uses AIM: An Adaptive and Iterative Mechanism - ---------- - Parameters - epsilon : float + Parameters: + epsilon (float): Privacy budget for the synthesizer - ----------- - Optional keyword arguments: - slide_range : bool = False + slide_range (bool = False): Specifies if the slide range transformation should be applied, this will make the minimal value of each column 0 before fitting. - thresh : float = 0.05 + thresh (float = 0.05): Specifies what the ratio of unique values to the column length should be for the column to be threated as cathegorical - delta : float = 1e-9 + delta (float = 1e-9): Privacy parameter. Should be small, in the range of 1/(n * sqrt(n)) - max_model_size : int = 80 + max_model_size (int = 80): Maximum size of the model - degree : int = 2 + degree (int = 2): Number of data columns used in the workload - num_marginals : int = None + num_marginals (int = None): Number of elements in the workload - max_cells : int = 10000 + max_cells (int = 10000): Maximum number of cells in a domain that can be used for the synthesizer - rounds : int = None + rounds (int = None): Number of rounds to run the algorithm for - verbose : bool = False + verbose (bool = False): Specifies if additional information should be printed or not """ @@ -688,34 +677,31 @@ class AIMSynthesizer(Synthesizer): Synthesizer which uses AIM: An Adaptive and Iterative Mechanism with adjustable `rounds_factor` parameter to influence the number of rounds to run the mechanism. - ---------- - Parameters - epsilon : float + Parameters: + epsilon (float): float Privacy budget for the synthesizer - ----------- - Optional keyword arguments: - rounds_factor : float = 0.1 + rounds_factor (float = 0.1): The factor to determine the number of rounds to run the AIM mechanism before generating the synthetic dataset. - slide_range : bool = False + slide_range (bool = False): Specifies if the slide range transformation should be applied, this will make the minimal value of each column 0 before fitting. - thresh : float = 0.05 + thresh (float = 0.05): Specifies what the ratio of unique values to the column length should be for the column to be threated as cathegorical - delta : float = 1e-9 + delta (float = 1e-9): Privacy parameter. Should be small, in the range of 1/(n * sqrt(n)) - max_model_size : int = 80 + max_model_size (int = 80): Maximum size of the model - degree : int = 2 + degree (int = 2): Number of data columns used in the workload - num_marginals : int = None + num_marginals (int = None): Number of elements in the workload - max_cells : int = 10000 + max_cells (int = 10000): Maximum number of cells in a domain that can be used for the synthesizer - rounds : int = None + rounds (int = None): Number of rounds to run the algorithm for - verbose : bool = False + verbose (bool = False): Specifies if additional information should be printed or not """ diff --git a/docs/.DS_Store b/docs/.DS_Store new file mode 100644 index 0000000..808bfcb Binary files /dev/null and b/docs/.DS_Store differ diff --git a/docs/api.md b/docs/api.md new file mode 100644 index 0000000..e1ccdde --- /dev/null +++ b/docs/api.md @@ -0,0 +1,15 @@ +Welcome to the documentation for SynRD's synthesizers! Below, you'll find information on how to configure and use the synthesizers in Python. + +## Example notebooks + +### Synthesizers' Configuration Notebook + +Synthesizer's Configuration Notebook is a file that provides a comprehensive guide and examples on how to configure and initialize synthesizers within the SynRD library. It serves as a reference for users who want to customize the behavior of synthesizers based on their specific requirements. + +- [Link to Configuration Notebook](config_notebook.ipynb) + +--- + +## Data Synthesizer Classes + +::: SynRD.synthesizers.synthesizer diff --git a/docs/blog/index.md b/docs/blog/index.md new file mode 100644 index 0000000..c58f16c --- /dev/null +++ b/docs/blog/index.md @@ -0,0 +1,2 @@ +# Blog + diff --git a/docs/config_notebook.ipynb b/docs/config_notebook.ipynb new file mode 100644 index 0000000..6117f20 --- /dev/null +++ b/docs/config_notebook.ipynb @@ -0,0 +1,731 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Imports" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Importing MSTSynthesizer class to see an example of performance on (new) configuration initialization (other Synthesizers work well too:))" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "from synthesizer import MSTSynthesizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Synthesizer configuration and how it works \n", + "\n", + "We can initialize the synthesizer in three ways:\n", + "\n", + "- using the regular parameters;\n", + "- by passing a configuration dictionary, with key - the name of the parameter, value - the value of the specified parameter;\n", + "- a combination of both methods;\n", + "\n", + "Let's take as an example MSTSynthesizer, as the same configuration options are available for other types. MSTSynthesizer has such parameters that can be configured by a user:\n", + "\n", + "**Required** parameters for MSTSynthesizer:\n", + "- epsilon : float\n", + "\n", + "\n", + "**Optional** parameters for MSTSynthesizer (with their default values if the user does not provide a \"customized\" one):\n", + "- slide_range : bool = False\n", + "- thresh : float = 0.05\n", + "- preprocess_factor : float = 0.05\n", + "- delta : float = 1e-09\n", + "- verbose: bool = False\n", + "\n", + "Let's see how it works on a few examples below:)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Epsilon as a required param to be passed" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The epsilon is a mandatory parameter to pass and it has no default value that will be used unless the user provides their own value. Thus, whenever the user does not provide an epsilon value, we will inform him/her that this is a required parameter, as the other parameters are optional." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Epsilon is a required parameter for Synthesizer.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/4058363018.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m }\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m**\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Epsilon:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmst_synth\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 8\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Slide range:\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmst_synth\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mslide_range\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msynth_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m ) -> None:\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslide_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0mallowed_additional_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"preprocess_factor\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"delta\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"verbose\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, **kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlocals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"epsilon\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Epsilon is a required parameter for Synthesizer.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon_value\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m raise TypeError(\n", + "\u001b[0;31mValueError\u001b[0m: Epsilon is a required parameter for Synthesizer." + ] + } + ], + "source": [ + "config = {\n", + " \"thresh\": 0.1,\n", + " \"delta\": 1e-05\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(**config)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range)\n", + "print(\"Thresh:\", mst_synth.thresh)\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor)\n", + "print(\"Delta:\", mst_synth.delta)\n", + "print(\"Verbose:\", mst_synth.verbose)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization throught config dictionary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Let's initialize the basic configuration where we pass **only** one required parameter - epsilon, and see what will be the values of MSTSynthesizer params:" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 0.08\n", + "Slide range: False\n", + "Thresh: 0.05\n", + "Preprocess factor: 0.05\n", + "Delta: 1e-09\n", + "Verbose: False\n" + ] + } + ], + "source": [ + "config = {\n", + " \"epsilon\": 0.08\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(**config)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "From the above result, you can see that all parameters were set with default values, while epsilon was set with a value provided by the user (as a required parameter)." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We also initialize the synthesizer inside the class, which is done as follows:\n", + "\n", + "`self.synthesizer = SmartnoiseMSTSynthesizer(\n", + " epsilon=self.epsilon, delta=self.delta, verbose=self.verbose, **synth_kwargs\n", + ")`\n", + "\n", + "Thus, we see that we can pass our customized parameters there as well (but only those that **can** be customized, e.g: epsilon, delta, and verbose)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSTSynthesizer Epsilon: 0.08\n", + "MSTSynthesizer Delta: 1e-09\n", + "MSTSynthesizer Verbose: False\n" + ] + } + ], + "source": [ + "print(\"MSTSynthesizer Epsilon:\", mst_synth.synthesizer.epsilon)\n", + "print(\"MSTSynthesizer Delta:\", mst_synth.synthesizer.delta)\n", + "print(\"MSTSynthesizer Verbose:\", mst_synth.synthesizer.verbose)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "As we can see, the epsilon we provided during class initialization is also passed to the synthesizer, while other params used their default values (as we did not configured them)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "We can configure more parameters and make sure that they are all set correctly (with custom values where they are passed and default values where they are not):" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 0.08\n", + "Slide range: True\n", + "Thresh: 0.05\n", + "Preprocess factor: 0.05\n", + "Delta: 0.25\n", + "Verbose: True\n" + ] + } + ], + "source": [ + "config = {\n", + " \"epsilon\": 0.08,\n", + " \"slide_range\": True,\n", + " \"verbose\": True,\n", + " \"delta\": 0.25 \n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(**config)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the values of self.synthesizer are also changed through this:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSTSynthesizer Epsilon: 0.08\n", + "MSTSynthesizer Delta: 0.25\n", + "MSTSynthesizer Verbose: True\n" + ] + } + ], + "source": [ + "print(\"MSTSynthesizer Epsilon:\", mst_synth.synthesizer.epsilon)\n", + "print(\"MSTSynthesizer Delta:\", mst_synth.synthesizer.delta) # default 1e-09\n", + "print(\"MSTSynthesizer Verbose:\", mst_synth.synthesizer.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization throught passing values as regular parameters" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "The user can also customize the synthesizer using the regular parameter names, which can be done as follows:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 0.1\n", + "Slide range: False\n", + "Thresh: 0.05\n", + "Preprocess factor: 1.2\n", + "Delta: 1e-09\n", + "Verbose: False\n" + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(epsilon=0.10, slide_range=False, preprocess_factor=1.2)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And (again) the values of self.synthesizer are also changed through this:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSTSynthesizer Epsilon: 0.1\n", + "MSTSynthesizer Delta: 1e-09\n", + "MSTSynthesizer Verbose: False\n" + ] + } + ], + "source": [ + "print(\"MSTSynthesizer Epsilon:\", mst_synth.synthesizer.epsilon)\n", + "print(\"MSTSynthesizer Delta:\", mst_synth.synthesizer.delta) # default 1e-09\n", + "print(\"MSTSynthesizer Verbose:\", mst_synth.synthesizer.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And, of course, you should remember that the epsilon is a mandatory parameter for transmission, so if you do not want an error to occur, please specify it (otherwise, we will notify you about it)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Epsilon is a required parameter for Synthesizer.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/1886790669.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mslide_range\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpreprocess_factor\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.2\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msynth_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m ) -> None:\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslide_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0mallowed_additional_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"preprocess_factor\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"delta\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"verbose\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, **kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlocals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"epsilon\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Epsilon is a required parameter for Synthesizer.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon_value\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m raise TypeError(\n", + "\u001b[0;31mValueError\u001b[0m: Epsilon is a required parameter for Synthesizer." + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(slide_range=False, preprocess_factor=1.2)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Initialization using both ways: regular params + config dictionary" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "In addition, the user can use both variants of parameter configurations, and the order in which they are passed does not matter, since both will work." + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 0.3\n", + "Slide range: False\n", + "Thresh: 0.4\n", + "Preprocess factor: 0.05\n", + "Delta: 1e-09\n", + "Verbose: True\n" + ] + } + ], + "source": [ + "config = {\n", + " \"epsilon\": 0.3,\n", + " \"thresh\": 0.4\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(verbose=True, **config)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And the other way around:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 0.3333\n", + "Slide range: False\n", + "Thresh: 0.4444\n", + "Preprocess factor: 0.5555\n", + "Delta: 1e-09\n", + "Verbose: False\n" + ] + } + ], + "source": [ + "config = {\n", + " \"epsilon\": 0.3333,\n", + " \"thresh\": 0.4444\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(**config, preprocess_factor=0.5555)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And (again) epsilon is a mandatory argument to be passed:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Epsilon is a required parameter for Synthesizer.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/3251956413.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m }\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msynth_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m ) -> None:\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslide_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0mallowed_additional_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"preprocess_factor\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"delta\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"verbose\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, **kwargs)\u001b[0m\n\u001b[1;32m 45\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlocals\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"epsilon\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 46\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mepsilon_value\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 47\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Epsilon is a required parameter for Synthesizer.\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon_value\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m raise TypeError(\n", + "\u001b[0;31mValueError\u001b[0m: Epsilon is a required parameter for Synthesizer." + ] + } + ], + "source": [ + "config = {\n", + " \"thresh\": 0.4,\n", + " \"delta\": 0.121\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(verbose=True, **config)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Checking argument types" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Another great thing is that we don't allow the user to pass arguments with unexpected types. For example: the user passes the argument `verbose = 5` (int type), while verbose should be of type bool. If such a situation occurs, a TypeError message is raised with information about what types are expected for the erroneous argument." + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "Epsilon must be of type int or float, got bool.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/2752234616.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 4\u001b[0m }\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# The epsilon value can only be of type int or float\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msynth_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m ) -> None:\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslide_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0mallowed_additional_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"preprocess_factor\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"delta\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"verbose\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, **kwargs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon_value\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mfloat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mint\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 49\u001b[0m raise TypeError(\n\u001b[0;32m---> 50\u001b[0;31m \u001b[0;34mf\"Epsilon must be of type int or float, got {type(epsilon_value).__name__}.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 51\u001b[0m )\n\u001b[1;32m 52\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mepsilon\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: Epsilon must be of type int or float, got bool." + ] + } + ], + "source": [ + "config = {\n", + " \"thresh\": 0.4,\n", + " \"delta\": 0.121\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(epsilon=True, **config) # The epsilon value can only be of type int or float" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "ename": "TypeError", + "evalue": "thresh must be of type float, got str.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/3447988808.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1.0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"0.4\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdelta\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# The thresh value can only be of type int or float\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0msynth_kwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 167\u001b[0m ) -> None:\n\u001b[0;32m--> 168\u001b[0;31m \u001b[0msuper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__init__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mslide_range\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mthresh\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 169\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 170\u001b[0m \u001b[0mallowed_additional_params\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m\"preprocess_factor\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"delta\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m\"verbose\"\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, **kwargs)\u001b[0m\n\u001b[1;32m 69\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam_value\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mparam_type\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 70\u001b[0m raise TypeError(\n\u001b[0;32m---> 71\u001b[0;31m \u001b[0;34mf\"{param} must be of type {param_type.__name__}, got {type(param_value).__name__}.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 72\u001b[0m )\n\u001b[1;32m 73\u001b[0m \u001b[0msetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparam\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparam_value\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mTypeError\u001b[0m: thresh must be of type float, got str." + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(epsilon=1.0, thresh=\"0.4\", delta=True) # The thresh value can only be of type int or float" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "So, from the above examples, we can see that passing the correct type of arguments is an essential step, and if the user makes a mistake, we inform them. However, we **allow the user to pass int values for float arguments** because we can easily convert the former to the latter. See examples below:" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 1.0\n", + "Slide range: False\n", + "Thresh: 2.0\n", + "Preprocess factor: 0.05\n", + "Delta: 5.0\n", + "Verbose: True\n" + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(epsilon=1, thresh=2, verbose=True, delta=5)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Slide range:\", mst_synth.slide_range) # default False\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Preprocess factor:\", mst_synth.preprocess_factor) # default 0.05\n", + "print(\"Delta:\", mst_synth.delta) # default 1e-09\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "And there won't be any issues for self.synthesizer attributes too:" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSTSynthesizer Epsilon: 1.0\n", + "MSTSynthesizer Delta: 5.0\n", + "MSTSynthesizer Verbose: True\n" + ] + } + ], + "source": [ + "print(\"MSTSynthesizer Epsilon:\", mst_synth.synthesizer.epsilon)\n", + "print(\"MSTSynthesizer Delta:\", mst_synth.synthesizer.delta) # default 1e-09\n", + "print(\"MSTSynthesizer Verbose:\", mst_synth.synthesizer.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Moreover, if the user provides None as the value for an argument, we will set the default value for this argument (if there is one). " + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Epsilon: 1.1\n", + "Thresh: 0.001\n", + "Verbose: False\n" + ] + } + ], + "source": [ + "config = {\n", + " \"epsilon\": 1.1,\n", + " \"verbose\": None,\n", + " \"thresh\": 0.001\n", + "}\n", + "\n", + "mst_synth = MSTSynthesizer(**config)\n", + "print(\"Epsilon:\", mst_synth.epsilon)\n", + "print(\"Thresh:\", mst_synth.thresh) # default 0.05\n", + "print(\"Verbose:\", mst_synth.verbose) # default False" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Non-existing arguments passed for the synthesizer" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Of course, we don't want the user to pass in non-existent parameters for a particular synth class, because that makes no sense. Another situation might be that the user is simply using the wrong synthesizer type, even if they are passing an \"existing\" parameter. In this case, we will inform the user that the provided arguments are not supported for the specified synthesizer type:" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Parameter 'hello' is not available for this type of synthesizer.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/564626642.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mhello\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"world\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mparam\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mallowed_additional_params\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m raise ValueError(\n\u001b[0;32m--> 174\u001b[0;31m \u001b[0;34mf\"Parameter '{param}' is not available for this type of synthesizer.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 175\u001b[0m )\n\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Parameter 'hello' is not available for this type of synthesizer." + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(epsilon=0.99, verbose=True, hello=\"world\")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "ename": "ValueError", + "evalue": "Parameter 'batch_size' is not available for this type of synthesizer.", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m/var/folders/wp/3b9l300x6jd499pj0h74v8500000gn/T/ipykernel_97092/2738562190.py\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mmst_synth\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mMSTSynthesizer\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mepsilon\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0.99\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mverbose\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mTrue\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_size\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;32m~/Desktop/RAI research program/project/SynRD/SynRD/synthesizers/synthesizer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, epsilon, slide_range, thresh, preprocess_factor, delta, verbose, **synth_kwargs)\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mparam\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mallowed_additional_params\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 173\u001b[0m raise ValueError(\n\u001b[0;32m--> 174\u001b[0;31m \u001b[0;34mf\"Parameter '{param}' is not available for this type of synthesizer.\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 175\u001b[0m )\n\u001b[1;32m 176\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Parameter 'batch_size' is not available for this type of synthesizer." + ] + } + ], + "source": [ + "mst_synth = MSTSynthesizer(epsilon=0.99, verbose=True, batch_size=5)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "synrd", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.16" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/docs/findings.md b/docs/findings.md new file mode 100644 index 0000000..b6fc4c6 --- /dev/null +++ b/docs/findings.md @@ -0,0 +1 @@ +hello \ No newline at end of file diff --git a/docs/img/.DS_Store b/docs/img/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/docs/img/.DS_Store differ diff --git a/docs/img/logo.png b/docs/img/logo.png new file mode 100644 index 0000000..ed5d5d6 Binary files /dev/null and b/docs/img/logo.png differ diff --git a/docs/img/taxonomy_synrd.png b/docs/img/taxonomy_synrd.png new file mode 100644 index 0000000..bd543d8 Binary files /dev/null and b/docs/img/taxonomy_synrd.png differ diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 0000000..91d231a --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,126 @@ +[![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) [![Python](https://img.shields.io/badge/python-3.7-blue)](https://www.python.org/) + +# SynRD Package +A Differentially Private (DP) Synthetic Data benchmarking package, posing the question: "Can a DP Synthesizer produce private (tabular) data that preserves scientific findings?" In other words, do DP Synthesizers satisfy *Epistemic Parity*? + +Citation: `Rosenblatt, L., Holovenko, A., Rumezhak, T., Stadnik, A., Herman, B., Stoyanovich, J., & Howe, B. (2022). Epistemic Parity: Reproducibility as an Evaluation Metric for Differential Privacy. arXiv preprint arXiv:2208.12700.` (under review) + +## Installing the benchmark +The benchmark is currently in `beta-0.1`. Still, you can install the development version by running the following commands: + +1. Create your preferred package management environment with `python=3.7` (for example, `conda create -n "synrd" python=3.7`) +2. `git clone https://github.com/DataResponsibly/SynRD.git` +3. `cd SynRD` +4. `pip install git+https://github.com/ryan112358/private-pgm.git` +5. `pip install .` + +Step (4) installs a non-PyPi dependency (this excellent package for DP synthesizers here: [private-pgm]. + +*Note: This package is under heavy development - if functionality doesn't work/is missing, feel free to add an issue or submit a PR to fix!* + +### Note on using GEMSynthesizer +If you would like to use the GEMSynthesizer, you must follow an alternative installation process for SynRD: + +1. Create your preferred package management environment with `python=3.7` (for example, `conda create -n "synrd" python=3.7`) +2. Git clone the SynRD repo: `git clone https://github.com/DataResponsibly/SynRD` +3. `cd SynRD/synthesizers` +4. Git clone the dp-query-release repo: `git clone https://github.com/terranceliu/dp-query-release.git` +5. Move `src/` folder out of `dp-query-release/` and into `SynRD/synthesizers/` +6. From the top level of SynRD clone, run `pip install .` + +## Further dependency notes +If you would like to benchmark with the paper `Fruiht2018Naturally`, please follow some of the following `rpy2` installation instructions to configure your R-Python interface package. + +### Install Option 1 for R + +If you have a mac with an M1 chip, you may have success installing rpy2 via the following: + +- Uninstall existing R versions on your machine. +- Install `R-4.2.2-arm64.pkg` from [cran.r-project.org]. +- `conda install -n base conda-forge::mamba` +- `mamba install -c conda-forge rpy2` + +### Install Option 2 for R + +To run analysis for papers using R, you must ensure that R is downloaded and your R_HOME environment variable is set to the path of the R executable. + +For installing with Anaconda, you may use `conda install r-base r-essentials`. + +For confirming rpy2 is working as expected, try the following in Python: +```python +import rpy2 + +rpy2.robjects.r['pi'] # Returns R object with the number pi +``` + +## Notes on structure of package + +- Each "paper" in the benchmark is named according to bibtex convention (authorYEARfirstword). + +_________________ + +# Notes on benchmark construction, reasoning, etc. + +## Taxonomy of findings +![Taxonomy of findings](img/taxonomy_synrd.png "Taxonomy of findings") + + +## How to add a new paper +Brief details on how to add a new paper. + +1. Create a new folder with (authorYEARfirstword) +2. Create a `process.ipynb` notebook as your data playground. Use this to investigate data cleaning/processing/results generation. +3. In parellel with (2), create a `authorYEARfirstword.py` file, and extend the `Publication()` metaclass with `AuthorYEARFirstword(Publication)`. Add the relevant details (see `meta_classes.py` for notes on what this means). Then, begin to move over `findings` from `process.ipynb` into replicable lambdas in `AuthorYEARFirstword(Publication)`. +4. Ensure that `AuthorYEARFirstword(Publication)` has a `FINDINGS` list class attribute. This should consist of `Finding` objects that wrap each `finding_i(self)` lambda in the proper `Finding, VisualFinding or FigureFinding` metaclass, and adds it to the list. +5. See `Saw2018Cross` for an example of a cleanly implemented `Publication` class. + +### Addendum on finding lambdas +`Finding` lambdas should have a particular structure that should be strictly adhered to. Consider the following example, and note particularly the return values +```Python +def finding_i_j(self): # there can be kwargs + """ + (Text from paper, usually 2 or 3 sentences) + """ + # often can use a table finding directly or + # as a starting point to quickly recreate + # finding + results = self.table() + + # (pandas stuff happens here to generate + # the findings) + + return ([values], + soft_finding, + [hard_findings]) +``` +The finding lambdas can essentially perform any computation necessary, but must return a tuple of: + +1. A list of values (these are a set of any relevant values to the soft finding, non-exhaustive) + + #### For example: + + ```Python + [interest_stem_ninth,interest_stem_eleventh] + ``` + +2. A soft_finding boolean (this is simply a boolean that reflects the primary inequality/contrast presented in the original paper for this finding) + + #### For example: + + ```Python + soft_finding = interest_stem_ninth > interest_stem_eleventh + ``` + +3. A list of hard findings i.e. values (this could be the difference or set of differences that affected the soft_finding inequality. F) + + #### For example: + + ```Python + hard_finding = interest_stem_ninth - interest_stem_eleventh + hard_findings = [hard_finding] + ``` + + +[lunr.js]: https://lunrjs.com/ +[private-pgm]: https://github.com/ryan112358/private-pgm +[cran.r-project.org]: https://cran.r-project.org/bin/macosx/ \ No newline at end of file diff --git a/imgs/.DS_Store b/imgs/.DS_Store new file mode 100644 index 0000000..5008ddf Binary files /dev/null and b/imgs/.DS_Store differ diff --git a/mkdocs.yml b/mkdocs.yml new file mode 100644 index 0000000..fa48913 --- /dev/null +++ b/mkdocs.yml @@ -0,0 +1,115 @@ +site_name: SynRD +#site_url: https://squidfunk.github.io/mkdocs-material/ +repo_name: DataResponsibly/SynRD +repo_url: https://github.com/DataResponsibly/SynRD + +theme: + name: "material" + logo: 'img/logo.png' + features: + - toc.integrate + - toc.follow + - navigation.top + - navigation.footer + - navigation.path + - navigation.sections + - navigation.expand + - navigation.tabs + - navigation.indexes + - search.suggest + - search.highlight + - search.share + - content.tabs.link + - content.code.annotation + - content.code.copy + language: en + palette: + - scheme: default + toggle: + icon: material/brightness-7 + name: Switch to dark mode + primary: teal + accent: purple + - scheme: slate + toggle: + icon: material/brightness-4 + name: Switch to light mode + primary: teal + accent: lime + font: + text: Rubik Regular + code: Roboto Mono + icon: + repo: fontawesome/brands/github + +nav: + - Quickstart: quickstart.md + - Python API: api.md + - Findings: findings.md + +plugins: + - mkdocstrings: + handlers: + python: + options: + docstring_section_style: list + docstring_style: google + - search: + pipeline: + - stemmer + - stopWordFilter + - trimmer + - minify: + minify_html: true + - ezlinks: + wikilinks: true + - mkdocs-jupyter: + execute: false + include_requirejs: true + +markdown_extensions: + - abbr + - admonition + - attr_list + - def_list + - footnotes + - md_in_html + - sane_lists + - def_list + - pymdownx.tasklist: + custom_checkbox: true + - toc: + permalink: true + - pymdownx.arithmatex: + generic: true + - pymdownx.betterem: + smart_enable: all + - pymdownx.magiclink: + normalize_issue_symbols: true + repo_url_shorthand: true + user: vihtoriaaa + repo: DataResponsibly/SynRD + - pymdownx.snippets: + auto_append: + - includes/mkdocs.md + - pymdownx.superfences: + custom_fences: + - name: mermaid + class: mermaid + format: !!python/name:pymdownx.superfences.fence_code_format + - pymdownx.tabbed: + alternate_style: true + combine_header_slug: true + slugify: !!python/object/apply:pymdownx.slugs.slugify + kwds: + case: lower + - pymdownx.tasklist: + custom_checkbox: true + - pymdownx.tilde + +copyright: Copyright © 2023 + +extra: + social: + - icon: fontawesome/brands/github + link: https://github.com/DataResponsibly/SynRD diff --git a/web/.DS_Store b/web/.DS_Store new file mode 100644 index 0000000..55a5da0 Binary files /dev/null and b/web/.DS_Store differ diff --git a/web/preprocess.ipynb b/web/preprocess.ipynb index 8f21de6..aec25d8 100644 --- a/web/preprocess.ipynb +++ b/web/preprocess.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -40,10 +40,242 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
paperfinding_numberfinding_typeparitystdfinding typesynthesizerepsilon
0iverson22football0NaN0.000.000000DESCRIPTIVE_STATISTICSprivbayes0.37
1iverson22football1NaN1.000.000000DESCRIPTIVE_STATISTICSprivbayes0.37
2iverson22football2NaN1.000.000000DESCRIPTIVE_STATISTICSprivbayes0.37
3iverson22football3NaN1.000.000000TEMPORAL_FIXED_CLASSprivbayes0.37
4iverson22football4NaN1.000.000000TEMPORAL_FIXED_CLASSprivbayes0.37
5iverson22football5NaN0.000.000000TEMPORAL_FIXED_CLASSprivbayes0.37
6iverson22football6NaN0.680.466476TEMPORAL_FIXED_CLASSprivbayes0.37
12iverson22football0NaN1.000.000000DESCRIPTIVE_STATISTICSprivbayes1.00
13iverson22football1NaN1.000.000000DESCRIPTIVE_STATISTICSprivbayes1.00
14iverson22football2NaN1.000.000000DESCRIPTIVE_STATISTICSprivbayes1.00
15iverson22football3NaN0.640.480000TEMPORAL_FIXED_CLASSprivbayes1.00
16iverson22football4NaN0.600.489898TEMPORAL_FIXED_CLASSprivbayes1.00
17iverson22football5NaN0.600.489898TEMPORAL_FIXED_CLASSprivbayes1.00
18iverson22football6NaN0.640.480000TEMPORAL_FIXED_CLASSprivbayes1.00
\n", + "
" + ], + "text/plain": [ + " paper finding_number finding_type parity std \\\n", + "0 iverson22football 0 NaN 0.00 0.000000 \n", + "1 iverson22football 1 NaN 1.00 0.000000 \n", + "2 iverson22football 2 NaN 1.00 0.000000 \n", + "3 iverson22football 3 NaN 1.00 0.000000 \n", + "4 iverson22football 4 NaN 1.00 0.000000 \n", + "5 iverson22football 5 NaN 0.00 0.000000 \n", + "6 iverson22football 6 NaN 0.68 0.466476 \n", + "12 iverson22football 0 NaN 1.00 0.000000 \n", + "13 iverson22football 1 NaN 1.00 0.000000 \n", + "14 iverson22football 2 NaN 1.00 0.000000 \n", + "15 iverson22football 3 NaN 0.64 0.480000 \n", + "16 iverson22football 4 NaN 0.60 0.489898 \n", + "17 iverson22football 5 NaN 0.60 0.489898 \n", + "18 iverson22football 6 NaN 0.64 0.480000 \n", + "\n", + " finding type synthesizer epsilon \n", + "0 DESCRIPTIVE_STATISTICS privbayes 0.37 \n", + "1 DESCRIPTIVE_STATISTICS privbayes 0.37 \n", + "2 DESCRIPTIVE_STATISTICS privbayes 0.37 \n", + "3 TEMPORAL_FIXED_CLASS privbayes 0.37 \n", + "4 TEMPORAL_FIXED_CLASS privbayes 0.37 \n", + "5 TEMPORAL_FIXED_CLASS privbayes 0.37 \n", + "6 TEMPORAL_FIXED_CLASS privbayes 0.37 \n", + "12 DESCRIPTIVE_STATISTICS privbayes 1.00 \n", + "13 DESCRIPTIVE_STATISTICS privbayes 1.00 \n", + "14 DESCRIPTIVE_STATISTICS privbayes 1.00 \n", + "15 TEMPORAL_FIXED_CLASS privbayes 1.00 \n", + "16 TEMPORAL_FIXED_CLASS privbayes 1.00 \n", + "17 TEMPORAL_FIXED_CLASS privbayes 1.00 \n", + "18 TEMPORAL_FIXED_CLASS privbayes 1.00 " + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df = pd.read_csv('data.csv')\n", + "df[df[\"paper\"]==\"iverson22football\"]" + ] } ], "metadata": { @@ -62,7 +294,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.12" + "version": "3.7.16" }, "orig_nbformat": 4, "vscode": {