Skip to content

Commit

Permalink
Merge pull request #47 from hpi-epic/add_normalization_after_sample
Browse files Browse the repository at this point in the history
Add normalization after sampling
  • Loading branch information
JohannesHuegle authored Jul 26, 2022
2 parents a9407d7 + c2e75ac commit 0bedbf5
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 2 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ python3 -m twine upload dist/*
| beta_lower_limit | (0, Inf) | 0.5 | Lower limit for beta values for influence of continuous parents. Betas are sampled uniform from the union of [-upper,-lower] and [lower,upper]. Upper limit see below. |
| beta_upper_limit | (0, Inf) | 1 | Upper limit for beta values for influence of continuous parents. Betas are sampled uniform from the union of [-upper,-lower] and [lower,upper]. Lower limit see above. |
| graph_structure_file | | None | Defines a path to a .gml file for a fixed DAG structure (ignoring node and edge characteristics) used during manm_cs graph building. Note graph_structure_file is mutually exclusive to num_nodes and edge_density. |
| normalized | 0 or 1 | 0 | Defines if the generated samples of continuous variables are normalized after sampling. |

## License

Expand Down
4 changes: 4 additions & 0 deletions manm_cs/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,8 @@ def parse_args():
parser.add_argument('--output_samples_file', type=str, required=False, default=SAMPLES_FILE,
help='Output file (path) for the generated samples csv. Relative to the directory from which the library is executed.'
'Specify without file extension.')
parser.add_argument('--normalize', type=to_bool, required=False, default=False,
help='Normalize the continuous variables in the dataset once all samples are generated.')
args = parser.parse_args()

assert args.min_discrete_value_classes <= args.max_discrete_value_classes, \
Expand Down Expand Up @@ -171,6 +173,8 @@ def graph_from_args(args) -> Graph:
graph = graph_from_args(args)

dfs = graph.sample(num_observations=args.num_samples, num_processes=args.num_processes)
if args.normalize:
dfs = graph.normalize_continous_columns(dataframes=dfs)
write_single_csv(dataframes=dfs, target_path=f"{args.output_samples_file}.csv")

nx_graph = graph.to_networkx_graph()
Expand Down
13 changes: 12 additions & 1 deletion manm_cs/graph/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import numpy as np
from pathos.multiprocessing import ProcessingPool

from manm_cs.variables.variable import Variable
from manm_cs.variables.variable import Variable, VariableType


class Graph:
Expand Down Expand Up @@ -50,3 +50,14 @@ def to_networkx_graph(self) -> nx.DiGraph:
for parent in var.parents:
nx_graph.add_edge(parent.idx, var.idx)
return nx_graph

def normalize_continous_columns(self, dataframes: List[pd.DataFrame]) -> List[pd.DataFrame]:

# merge df in dataframes
merged_df = pd.concat(dataframes, axis=1)
for variable in self.variables:
if variable.type == VariableType.CONTINUOUS:
print(variable.idx, merged_df[variable.idx].mean(), merged_df[variable.idx].std())
merged_df[variable.idx] = (merged_df[variable.idx] - merged_df[variable.idx].mean())/ merged_df[variable.idx].std()

return [merged_df]
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[metadata]
name = manm_cs
version = 0.1.1
version = 0.1.2
author = Johannes Huegle, Christopher Hagedorn, Lukas Boehme, Mats Poerschke, Jonas Umland
author_email = [email protected]
description = Data generation for causal structure learning based on mixed additive noise model (MANM)
Expand Down

0 comments on commit 0bedbf5

Please sign in to comment.