comments and docstring improvements

GFZ · Nov 27, 2024 · 5d7a20e · 5d7a20e
1 parent f1d9647
commit 5d7a20e
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 26 deletions.
diff --git a/egsim/api/data/client/snippets/get_egsim_predictions.py b/egsim/api/data/client/snippets/get_egsim_predictions.py
@@ -36,11 +36,12 @@ def get_egsim_predictions(
 
     A [pandas DataFrame](https://pandas.pydata.org/docs/user_guide/dsintro.html#dataframe)
     
-    Each row denotes a scenario (i.e., a combination of a configured Rupture and Site
-    properties), the first column a unique scenario identifier (incremental and
-    starting from 0) and each remaining column denotes:
+    Each row denotes a scenario (i.e., a combination of a given Rupture and Site) and
+    each column the scenario configured or computed properties. More specifically, except
+    the first column (denoting a unique scenario identifier, incremental and starting
+    from 0), each column denotes:
 
-    - a computed prediction if the first chunk is an intensity measure type
+    - a computed prediction if the first chunk of ts name is an intensity measure type
       (e.g. "PGA median BindiEtAl2014Rjb"): in this case, the second chunk is the metric 
       type (e.g. "median") and the third the predicting model ("BindiEtAl2014Rjb")
     

diff --git a/egsim/api/data/client/snippets/get_egsim_residuals.py b/egsim/api/data/client/snippets/get_egsim_residuals.py
@@ -51,17 +51,18 @@ def get_egsim_residuals(
 
     A [pandas DataFrame](https://pandas.pydata.org/docs/user_guide/dsintro.html#dataframe)
     
-    If ranking is True, then each row denotes a model (reported in the 1st column)
-    and each column a measure of fit (reported in the 1st row).
+    If ranking is True, then each row denotes a model (reported in the first column)
+    and each column a measure of fit (reported in the first row).
     
-    If ranking is False, then each row denotes a flatfile record, the first column
-    the record id (the position in the original flatfile, starting from 0) and each
-    remaining column denotes:
+    If ranking is False, then each row denotes a flatfile record and each column the
+    record configured or computed properties. More specifically, except the first
+    column (denoting the record id, i.e. the position in the original flatfile,
+    starting from 0), each column denotes:
     
-    - a computed residual or prediction, if the first chunk is an intensity measure type
-      (e.g. "PGA total_residual BindiEtAl2014Rjb"): in this case, the second chunk is 
-      the metric type ("total_residual") and the third the predicting model
-      ("BindiEtAl2014Rjb")
+    - a computed residual or prediction, if the first chunk of ts name is an intensity
+      measure type (e.g. "PGA total_residual BindiEtAl2014Rjb"): in this case, the
+      second chunk is the metric type ("total_residual") and the third the predicting
+      model ("BindiEtAl2014Rjb")
     
     - the flatfile data relative to the computed prediction if the first chunk
       is the text "input"  (e.g., "input distance_measure rrup"): in this case, the

diff --git a/egsim/smtk/flatfile.py b/egsim/smtk/flatfile.py
@@ -453,10 +453,9 @@ def cast_to_dtype(
 
 
 class FlatfileError(InputError):
-    """General flatfile column(s) error. Inherits from smtk.validators.InputError.
-    Note that the str representation equals the init arguments comma-separated:
-    FlatfileError(arg1, arg2, ...) -> f"{str(arg1)}, {str(arg2)}, ..."
-    See subclasses for details
+    """Subclass of :class:`smtk.validators.InputError` for describing flatfile
+    errors (specifically, column errors). See subclasses for details. Remember
+    that `str(FlatfileError(arg1, arg2, ...)) = str(arg1) + ", " + str(arg2) + ...
     """
     pass
 
@@ -484,8 +483,8 @@ class FlatfileMetadata:
 
     @staticmethod
     def has(column: str) -> bool:
-        """Return whether the given column name is a registered flatfile column
-        (column aliases included)
+        """Return whether the given argument is a registered flatfile column name
+        (including aliases)
         """
         return bool(FlatfileMetadata._props_of(column))
 
@@ -533,12 +532,12 @@ def get_help(column: str) -> str:
 
     @staticmethod
     def get_dtype(column: str) -> Union[ColumnDtype, None]:
-        """Return the data type of the given column name, as item of the
-        `ColumnDtype` Enum item, or None (=no data type set for the column).
-        If the column dtype is categorical, this method returns the (unique) dtype
-        of all categories. This means that `ColumnDtype.category` is never returned,
-        and to check that the column dtype is categorical, check that
-        `get_categories(column)` returns a non-empty list
+        """Return the data type of the given column name, as `ColumnDtype` Enum item,
+        or None if the column ha no known data type.
+        If the column dtype is categorical, this method will *not* return
+        `ColumnDtype.category`, but the (unique) dtype of all categories (e.g.,
+        `ColumnDtype.str`). Hence, if you want to know whether the column dtype is
+        categorical, check that `get_categories(column)` returns a non-empty list
         """
         dtype = FlatfileMetadata._get_dtype(column)
         if isinstance(dtype, pd.CategoricalDtype):

diff --git a/egsim/smtk/validators.py b/egsim/smtk/validators.py
@@ -142,7 +142,9 @@ def validate_imt_sa_limits(model: GMPE, imts: dict[str, IMT]) -> dict[str, IMT]:
 
 
 class InputError(ValueError):
-    """Base **abstract** exception for any input error (model, imt, flatfile)"""
+    """Base **abstract** exception for any input error (model, imt, flatfile).
+    Note that `str(InputError(arg1, arg2, ...)) = str(arg1) + ", " + str(arg2) + ...
+    """
 
     def __str__(self):
         """Reformat ``str(self)``"""