Bug fix in LabelTensor, Dataset and DataLoader and codacy warning cor…

…rection
mathLab · Nov 6, 2024 · 968c1a1 · 968c1a1
1 parent dbb5476
commit 968c1a1
Show file tree

Hide file tree

Showing 19 changed files with 339 additions and 270 deletions.
diff --git a/pina/condition/data_condition.py b/pina/condition/data_condition.py
@@ -15,6 +15,7 @@ class DataConditionInterface(ConditionInterface):
     """
 
     __slots__ = ["input_points", "conditional_variables"]
+    condition_type = ['unsupervised']
 
     def __init__(self, input_points, conditional_variables=None):
         """
@@ -23,7 +24,6 @@ def __init__(self, input_points, conditional_variables=None):
         super().__init__()
         self.input_points = input_points
         self.conditional_variables = conditional_variables
-        self._condition_type = 'unsupervised'
 
     def __setattr__(self, key, value):
         if (key == 'input_points') or (key == 'conditional_variables'):

diff --git a/pina/condition/domain_equation_condition.py b/pina/condition/domain_equation_condition.py
@@ -13,15 +13,14 @@ class DomainEquationCondition(ConditionInterface):
     """
 
     __slots__ = ["domain", "equation"]
-
+    condition_type = ['physics']
     def __init__(self, domain, equation):
         """
         TODO
         """
         super().__init__()
         self.domain = domain
         self.equation = equation
-        self._condition_type = 'physics'
 
     def __setattr__(self, key, value):
         if key == 'domain':

diff --git a/pina/condition/input_equation_condition.py b/pina/condition/input_equation_condition.py
@@ -14,15 +14,14 @@ class InputPointsEquationCondition(ConditionInterface):
     """
 
     __slots__ = ["input_points", "equation"]
-
+    condition_type = ['physics']
     def __init__(self, input_points, equation):
         """
         TODO
         """
         super().__init__()
         self.input_points = input_points
         self.equation = equation
-        self._condition_type = 'physics'
 
     def __setattr__(self, key, value):
         if key == 'input_points':

diff --git a/pina/condition/input_output_condition.py b/pina/condition/input_output_condition.py
@@ -13,15 +13,14 @@ class InputOutputPointsCondition(ConditionInterface):
     """
 
     __slots__ = ["input_points", "output_points"]
-
+    condition_type = ['supervised']
     def __init__(self, input_points, output_points):
         """
         TODO
         """
         super().__init__()
         self.input_points = input_points
         self.output_points = output_points
-        self._condition_type = ['supervised', 'physics']
 
     def __setattr__(self, key, value):
         if (key == 'input_points') or (key == 'output_points'):

diff --git a/pina/data/base_dataset.py b/pina/data/base_dataset.py
@@ -5,7 +5,6 @@
 import logging
 
 from torch.utils.data import Dataset
-
 from ..label_tensor import LabelTensor
 
 
@@ -109,14 +108,14 @@ def initialize(self):
         already filled
         """
         logging.debug(f'Initialize dataset {self.__class__.__name__}')
-
         if self.num_el_per_condition:
             self.condition_indices = torch.cat([
-                torch.tensor([i] * self.num_el_per_condition[i],
-                             dtype=torch.uint8)
+                torch.tensor(
+                    [self.conditions_idx[i]] * self.num_el_per_condition[i],
+                    dtype=torch.uint8)
                 for i in range(len(self.num_el_per_condition))
             ],
-                                               dim=0)
+                dim=0)
             for slot in self.__slots__:
                 current_attribute = getattr(self, slot)
                 if all(isinstance(a, LabelTensor) for a in current_attribute):

diff --git a/pina/data/data_module.py b/pina/data/data_module.py
@@ -23,8 +23,8 @@ def __init__(self,
                  problem,
                  device,
                  train_size=.7,
-                 test_size=.1,
-                 val_size=.2,
+                 test_size=.2,
+                 val_size=.1,
                  predict_size=0.,
                  batch_size=None,
                  shuffle=True,
@@ -61,28 +61,30 @@ def __init__(self,
         if train_size > 0:
             self.split_names.append('train')
             self.split_length.append(train_size)
-            self.loader_functions['train_dataloader'] = lambda: PinaDataLoader(
-                self.splits['train'], self.batch_size, self.condition_names)
+            self.loader_functions['train_dataloader'] = lambda \
+                x: PinaDataLoader(self.splits['train'], self.batch_size,
+                                  self.condition_names)
         if test_size > 0:
             self.split_length.append(test_size)
             self.split_names.append('test')
-            self.loader_functions['test_dataloader'] = lambda: PinaDataLoader(
+            self.loader_functions['test_dataloader'] = lambda x: PinaDataLoader(
                 self.splits['test'], self.batch_size, self.condition_names)
         if val_size > 0:
             self.split_length.append(val_size)
             self.split_names.append('val')
-            self.loader_functions['val_dataloader'] = lambda: PinaDataLoader(
+            self.loader_functions['val_dataloader'] = lambda x: PinaDataLoader(
                 self.splits['val'], self.batch_size, self.condition_names)
         if predict_size > 0:
             self.split_length.append(predict_size)
             self.split_names.append('predict')
-            self.loader_functions['predict_dataloader'] = lambda: PinaDataLoader(
+            self.loader_functions[
+                'predict_dataloader'] = lambda x: PinaDataLoader(
                 self.splits['predict'], self.batch_size, self.condition_names)
         self.splits = {k: {} for k in self.split_names}
         self.shuffle = shuffle
 
         for k, v in self.loader_functions.items():
-            setattr(self, k, v)
+            setattr(self, k, v.__get__(self, PinaDataModule))
 
     def prepare_data(self):
         if self.datasets is None:
@@ -140,12 +142,11 @@ def dataset_split(dataset, lengths, seed=None, shuffle=True):
                 indices = torch.randperm(sum(lengths))
             dataset.apply_shuffle(indices)
 
-        indices = torch.arange(0, sum(lengths), 1, dtype=torch.uint8).tolist()
         offsets = [
             sum(lengths[:i]) if i > 0 else 0 for i in range(len(lengths))
         ]
         return [
-            PinaSubset(dataset, indices[offset:offset + length])
+            PinaSubset(dataset, slice(offset, offset + length))
             for offset, length in zip(offsets, lengths)
         ]
 

diff --git a/pina/data/pina_batch.py b/pina/data/pina_batch.py
@@ -27,21 +27,27 @@ def __len__(self):
         :rtype: int
         """
         length = 0
-        for dataset in dir(self):
+        for dataset in self.attributes:
             attribute = getattr(self, dataset)
-            if isinstance(attribute, list):
-                length += len(getattr(self, dataset))
+            length += len(attribute)
         return length
 
     def __getattribute__(self, item):
         if item in super().__getattribute__('attributes'):
             dataset = super().__getattribute__(item)
             index = super().__getattribute__(item + '_idx')
-            return PinaSubset(dataset.dataset, dataset.indices[index])
+            if isinstance(dataset, PinaSubset):
+                dataset_index = dataset.indices
+                if isinstance(dataset_index, slice):
+                    index = slice(dataset_index.start + index.start,
+                                  min(dataset_index.start + index.stop,
+                                      dataset_index.stop))
+            return PinaSubset(dataset.dataset, index,
+                              require_grad=self.require_grad)
         return super().__getattribute__(item)
 
     def __getattr__(self, item):
         if item == 'data' and len(self.attributes) == 1:
             item = self.attributes[0]
-            return super().__getattribute__(item)
+            return self.__getattribute__(item)
         raise AttributeError(f"'Batch' object has no attribute '{item}'")
diff --git a/pina/data/pina_dataloader.py b/pina/data/pina_dataloader.py
@@ -2,6 +2,7 @@
 This module is used to create an iterable object used during training
 """
 import math
+
 from .pina_batch import Batch
 
 
@@ -26,6 +27,7 @@ def __init__(self, dataset_dict, batch_size, condition_names) -> None:
         """
         self.condition_names = condition_names
         self.dataset_dict = dataset_dict
+        self.batch_size = batch_size
         self._init_batches(batch_size)
 
     def _init_batches(self, batch_size=None):
@@ -36,20 +38,46 @@ def _init_batches(self, batch_size=None):
         n_elements = sum(len(v) for v in self.dataset_dict.values())
         if batch_size is None:
             batch_size = n_elements
-        indexes_dict = {}
+            self.batch_size = n_elements
         n_batches = int(math.ceil(n_elements / batch_size))
-        for k, v in self.dataset_dict.items():
-            if n_batches != 1:
-                indexes_dict[k] = math.floor(len(v) / (n_batches - 1))
-            else:
-                indexes_dict[k] = len(v)
-        for i in range(n_batches):
-            temp_dict = {}
-            for k, v in indexes_dict.items():
-                if i != n_batches - 1:
-                    temp_dict[k] = slice(i * v, (i + 1) * v)
+        indexes_dict = {
+            k: math.floor(len(v) / n_batches) if n_batches != 1 else len(v) for
+            k, v in self.dataset_dict.items()}
+
+        dataset_names = list(self.dataset_dict.keys())
+        num_el_per_batch = [{i: indexes_dict[i] for i in dataset_names} for _
+                            in range(n_batches - 1)] + [
+                               {i: 0 for i in dataset_names}]
+        reminders = {
+            i: len(self.dataset_dict[i]) - indexes_dict[i] * (n_batches - 1) for
+            i in dataset_names}
+        dataset_names = iter(dataset_names)
+        name = next(dataset_names, None)
+        for batch in num_el_per_batch:
+            tot_num_el = sum(batch.values())
+            batch_reminder = batch_size - tot_num_el
+            for _ in range(batch_reminder):
+                if name is None:
+                    break
+                if reminders[name] > 0:
+                    batch[name] += 1
+                    reminders[name] -= 1
                 else:
-                    temp_dict[k] = slice(i * v, len(self.dataset_dict[k]))
+                    name = next(dataset_names, None)
+                    if name is None:
+                        break
+                    batch[name] += 1
+                    reminders[name] -= 1
+
+        reminders, dataset_names, indexes_dict = None, None, None  # free memory
+        actual_indices = {k: 0 for k in self.dataset_dict.keys()}
+        for batch in num_el_per_batch:
+            temp_dict = {}
+            total_length = 0
+            for k, v in batch.items():
+                temp_dict[k] = slice(actual_indices[k], actual_indices[k] + v)
+                actual_indices[k] = actual_indices[k] + v
+                total_length += v
             self.batches.append(
                 Batch(idx_dict=temp_dict, dataset_dict=self.dataset_dict))
 

diff --git a/pina/data/pina_subset.py b/pina/data/pina_subset.py
@@ -11,7 +11,7 @@ class PinaSubset:
     """
     __slots__ = ['dataset', 'indices', 'require_grad']
 
-    def __init__(self, dataset, indices, require_grad=True):
+    def __init__(self, dataset, indices, require_grad=False):
         """
         TODO
         """
@@ -23,14 +23,27 @@ def __len__(self):
         """
         TODO
         """
+        if isinstance(self.indices, slice):
+            return self.indices.stop - self.indices.start
         return len(self.indices)
 
     def __getattr__(self, name):
         tensor = self.dataset.__getattribute__(name)
         if isinstance(tensor, (LabelTensor, Tensor)):
-            tensor = tensor[[self.indices]].to(self.dataset.device)
+            if isinstance(self.indices, slice):
+                tensor = tensor[self.indices]
+                if (tensor.device != self.dataset.device
+                        and tensor.dtype == float32):
+                    tensor = tensor.to(self.dataset.device)
+            elif isinstance(self.indices, list):
+                tensor = tensor[[self.indices]].to(self.dataset.device)
+            else:
+                raise ValueError(f"Indices type {type(self.indices)} not "
+                                 f"supported")
             return tensor.requires_grad_(
                 self.require_grad) if tensor.dtype == float32 else tensor
         if isinstance(tensor, list):
-            return [tensor[i] for i in self.indices]
+            if isinstance(self.indices, list):
+                return [tensor[i] for i in self.indices]
+            return tensor[self.indices]
         raise AttributeError(f"No attribute named {name}")