Skip to content

Commit

Permalink
Merge pull request #79 from SWIFTSIM/log_file_reader
Browse files Browse the repository at this point in the history
Log file reader
  • Loading branch information
JBorrow authored Oct 7, 2020
2 parents 23efffb + b086caa commit 7dd59d5
Show file tree
Hide file tree
Showing 8 changed files with 267 additions and 3 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
SWIFTsimIO Changelog
====================

v 4.2.0
-------

Added a log file reader.

+ Added the `swiftsimio.statistics.SWIFTStatisticsFile` functionality
to read statistics log files.

v 4.1.0
-------

Expand Down
1 change: 1 addition & 0 deletions docs/source/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ snapshots to enable partial reading.
visualisation/index
velociraptor/index
creating_initial_conditions/index
statistics/index
command_line/index

modules/index
Expand Down
32 changes: 32 additions & 0 deletions docs/source/statistics/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
Statistics Files
================

:mod:`swiftsimio` includes routines to load log files, such as the
``SFR.txt`` and ``energy.txt``. This is available through the
:obj:`swiftsimio.statistics.SWIFTStatisticsFile` object, or through
the main ``load_statistics`` function.

Example
-------

.. code-block::python
from swiftsimio import load_statistics
data = load_statistics("energy.txt")
print(data)
print(x.total_mass.name)
Will output:

.. code-block::bash
Statistics file: energy.txt, containing fields: #, step, time, a, z, total_mass,
gas_mass, dm_mass, sink_mass, star_mass, bh_mass, gas_z_mass, star_z_mass,
bh_z_mass, kin_energy, int_energy, pot_energy, rad_energy, gas_entropy, com_x,
com_y, com_z, mom_x, mom_y, mom_z, ang_mom_x, ang_mom_y, ang_mom_z
'Total mass in the simulation'
17 changes: 16 additions & 1 deletion swiftsimio/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from .reader import *
from .writer import SWIFTWriterDataset
from .masks import SWIFTMask
from .statistics import SWIFTStatisticsFile
from .__version__ import __version__
from .__cite__ import __cite__

Expand All @@ -10,6 +11,7 @@
import swiftsimio.visualisation as visualisation
import swiftsimio.units as units
import swiftsimio.subset_writer as subset_writer
import swiftsimio.statistics as statistics

name = "swiftsimio"

Expand Down Expand Up @@ -85,13 +87,26 @@ def load(filename, mask=None) -> SWIFTDataset:
Parameters
----------
filename : str
file to containing SWIFT dataset to read
SWIFT snapshot file to read
mask : SWIFTMask, optional
mask to apply when reading dataset
"""

return SWIFTDataset(filename, mask=mask)

def load_statistics(filename) -> SWIFTStatisticsFile:
"""
Loads a SWIFT statistics file (``SFR.txt``, ``energy.txt``).
Parameters
----------
filename : str
SWIFT statistics file path
"""

return SWIFTStatisticsFile(filename=filename)


# Rename this object to something simpler.
Writer = SWIFTWriterDataset
2 changes: 1 addition & 1 deletion swiftsimio/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "4.1.0"
__version__ = "4.2.0"
50 changes: 49 additions & 1 deletion swiftsimio/accelerated.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from h5py._hl.dataset import Dataset

from typing import Tuple
from typing import Tuple, Union, List

try:
from numba import jit, prange
Expand Down Expand Up @@ -495,3 +495,51 @@ def read_ranges_from_file(
)

return read_ranges(handle, ranges, output_shape, output_type, columns)


def list_of_strings_to_arrays(lines: List[str]) -> Union[np.array]:
"""
Converts a list of space-delimited values to arrays.
Parameters
----------
lines: List[str]
List of strings containing numbers separated by a set of spaces.
Returns
-------
arrays: List[np.array]
List of numpy arrays, one per column.
Notes
-----
Currently not suitable for ``numba`` acceleration due to mixed datatype usage.
"""

# Calculate types and set up arrays.

arrays = []
dtypes = []
number_of_lines = len(lines)

for item in lines[0].split():
if "." in item or "e" in item:
dtype = np.float64
else:
dtype = np.int64

dtypes.append(dtype)

arrays.append(np.zeros(number_of_lines, dtype=dtype))

for index, line in enumerate(lines):
for dtype, (array, value) in zip(dtypes, enumerate(line.split())):
arrays[array][index] = dtype(value)

return arrays

133 changes: 133 additions & 0 deletions swiftsimio/statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
"""
Reader for the statistics file.
"""

import unyt
import regex as re

from typing import List, Dict

from swiftsimio.accelerated import list_of_strings_to_arrays


class SWIFTStatisticsFile(object):
"""
SWIFT statistics files (e.g. SFR.txt, energy.txt) reader.
"""

# Names from the header.
header_names: List[str]
# Units (unyt-based) from the header
header_units: Dict[str, unyt.unyt_quantity]
# snake_case names from the header
header_snake_case_names: List[str]
# Raw lines as strings, read from the file.
raw_lines: List[str]

def __init__(self, filename: str):
"""
Parameters
----------
filename: str
File name for the statistics file.
"""

self.filename = filename

self._read_file()
self._process_raw_lines()

return

def _read_file(self):
"""
Reads the header of the file, including loading the units.
"""

# Read the header and use custom regex parsing.

with open(self.filename, "r") as handle:
lines = handle.readlines()

current_line = 0

header_names = []
header_units = {}
current_name = None

# Regex for matching
regex_name = re.compile(r"# \(([0-9]*)\) +([^\.\n]*)")
regex_unit = re.compile(r"# *Unit = ([^\s]+) ?(.*)")

while lines[current_line].startswith("#"):
# Regex match each line to see if it is a unit
# or a name

current_string = lines[current_line]
current_line += 1

name_match = regex_name.match(current_string)

if name_match:
current_name = name_match.group(2)
header_units[current_name] = unyt.dimensionless
header_names.append(current_name)

continue

unit_match = regex_unit.match(current_string)

if unit_match:
if unit_match.group(1) != "dimensionless":
header_units[current_name] = unyt.unyt_quantity(
float(unit_match.group(1)), unit_match.group(2)
)
else:
header_units[current_name] = unyt.dimensionless

continue

# The last line will be the names, so extract those here.
header_snake_case_names = [
x.replace(".", "").replace(" ", "_").replace("\n", "").lower()
for x in re.split(r"\s{2,}", lines[current_line - 1][1:])
if x != ""
]

self.header_names = header_names
self.header_units = header_units
self.header_snake_case_names = header_snake_case_names

self.raw_lines = lines[current_line:]

return

def _process_raw_lines(self):
"""
Processes the raw string lines read out of the header.
"""

arrays = list_of_strings_to_arrays(lines=self.raw_lines)

for array, header_name, header_snake_case_name in zip(
arrays, self.header_names, self.header_snake_case_names
):
setattr(
self,
header_snake_case_name,
unyt.unyt_array(
array, units=self.header_units[header_name], name=header_name
),
)

return

def __str__(self):
return (
f"Statistics file: {self.filename}, containing fields: "
f"{', '.join(self.header_snake_case_names)}"
)

def __repr__(self):
return str(self)
27 changes: 27 additions & 0 deletions tests/test_accelerated.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
ranges_from_array,
read_ranges_from_file,
index_dataset,
list_of_strings_to_arrays,
)

import numpy as np
Expand Down Expand Up @@ -93,3 +94,29 @@ def test_index_dataset_h5py():
dataset = file.create_dataset("Test", data=data)

assert (index_dataset(dataset, mask) == data[mask]).all()


def test_list_of_strings_to_arrays():
"""
Tests list_of_strings_to_arrays.
"""

lines = [
" 0 0.0000 1.0e-3 14.0",
" 7 3.0000 1.0e-3 14.0",
]

expected_output = [
np.array([0, 7], dtype=np.int64),
np.array([0, 3], dtype=np.float64),
np.array([1e-3, 1e-3], dtype=np.float64),
np.array([14, 14], dtype=np.float64),
]

output = list_of_strings_to_arrays(lines)

for expected, real in zip(expected_output, output):
assert expected.dtype == real.dtype
assert (expected == real).all()

return

0 comments on commit 7dd59d5

Please sign in to comment.