Skip to content

Spatio-Temporal Interpolation

Spatio-temporal interpolation module.

Support function to perform interpolation in various ways

interpolate(location_in=None, time_in=None, values_in=None, location_out=None, time_out=None, **kwargs)

Interpolates data based on input.

Interpolation using scipy.griddata function. Which in turn uses linear barycentric interpolation.

It is assumed that the shape of location_in, time_in and values_in is consistent

When time_out has the same size as number of rows of location_out, it is assumed these are aligned and be treated as consistent, hence the output will be a column vector. If this is not the case an interpolation will be performed for all combinations of rows in location out with times of time_out and output wil be shaped as [nof_location_values x dimension]

If location_out == None, we only perform temporal (1D) interpolation. If time_out == None we only perform spatial interpolation

If linear interpolation is not possible for spatio or spatiotemporal interpolation, we use nearest neighbor interpolation, a warning will be displayed

Parameters:

Name Type Description Default
location_in ndarray

Array of size [nof_values x dimension] with locations to interpolate from

None
time_in Union[ndarray, DatetimeArray]

Array of size [nof_values x 1] with timestamps or some form of time values (seconds) to interpolate from

None
values_in ndarray

Array of size [nof_values x 1] with values to interpolate from

None
location_out ndarray

Array of size [nof_location_values x dimension] with locations to interpolate to

None
time_out Union[ndarray, DatetimeArray]

Array of size [nof_time_values x 1] with timestamps or some form of time values (seconds) to interpolate to

None
**kwargs dict

Other keyword arguments which get passed into the griddata interpolation function

{}

Returns:

Name Type Description
result ndarray

Array of size [nof_location_values x nof_time_values] with interpolated values

Source code in src/pyelq/support_functions/spatio_temporal_interpolation.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def interpolate(
    location_in: np.ndarray = None,
    time_in: Union[np.ndarray, pd.arrays.DatetimeArray] = None,
    values_in: np.ndarray = None,
    location_out: np.ndarray = None,
    time_out: Union[np.ndarray, pd.arrays.DatetimeArray] = None,
    **kwargs,
) -> np.ndarray:
    """Interpolates data based on input.

    Interpolation using scipy.griddata function. Which in turn uses linear barycentric interpolation.

    It is assumed that the shape of location_in, time_in and values_in is consistent

    When time_out has the same size as number of rows of location_out, it is assumed these are aligned and be treated as
    consistent, hence the output will be a column vector.
    If this is not the case an interpolation will be performed for all combinations of rows in location out with times
    of time_out and output wil be shaped as [nof_location_values x dimension]

    If location_out == None, we only perform temporal (1D) interpolation.
    If time_out == None we only perform spatial  interpolation

    If linear interpolation is not possible for spatio or spatiotemporal interpolation, we use nearest neighbor
    interpolation, a warning will be displayed

    Args:
        location_in (np.ndarray): Array of size [nof_values x dimension] with locations to interpolate from
        time_in (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_values x 1] with timestamps or some
            form of time values (seconds) to interpolate from
        values_in (np.ndarray): Array of size [nof_values x 1] with values to interpolate from
        location_out (np.ndarray): Array of size [nof_location_values x dimension] with locations to interpolate to
        time_out (Union[np.ndarray, pd.arrays.DatetimeArray]): Array of size [nof_time_values x 1] with
            timestamps or some form of time values (seconds) to interpolate to
        **kwargs (dict): Other keyword arguments which get passed into the griddata interpolation function

    Returns:
        result (np.ndarray): Array of size [nof_location_values x nof_time_values] with interpolated values

    """
    _sense_check_interpolate_inputs(
        location_in=location_in, time_in=time_in, values_in=values_in, location_out=location_out, time_out=time_out
    )

    if (
        time_out is not None
        and isinstance(time_out, pd.arrays.DatetimeArray)
        and isinstance(time_in, pd.arrays.DatetimeArray)
    ):
        min_time_out = np.amin(time_out)
        time_out = (time_out - min_time_out).total_seconds()
        time_in = (time_in - min_time_out).total_seconds()

    if location_out is None:
        return _griddata(points_in=time_in, values=values_in, points_out=time_out, **kwargs)

    if time_out is None:
        return _griddata(points_in=location_in, values=values_in, points_out=location_out, **kwargs)

    if location_in.shape[0] != time_in.size:
        raise ValueError("Location and time are do not have consistent sizes")

    if location_out.shape[0] != time_out.size:
        location_temp = np.tile(location_out, (time_out.size, 1))
        time_temp = np.repeat(time_out.squeeze(), location_out.shape[0])
        out_array = np.column_stack((location_temp, time_temp))
    else:
        out_array = np.column_stack((location_out, time_out))

    in_array = np.column_stack((location_in, time_in))

    result = _griddata(points_in=in_array, values=values_in, points_out=out_array, **kwargs)

    if location_out.shape[0] != time_out.size:
        result = result.reshape((location_out.shape[0], time_out.size), order="C")

    return result

temporal_resampling(time_in, values_in, time_bin_edges, aggregate_function='mean', side='center')

Resamples data into a set of time bins.

Checks which values of time_in are withing 2 consecutive values of time_bin_edges and performs the aggregate function on the corresponding values from values_in. time_in values outside the time_bin_edges are ignored. Empty bins will be assigned a 'NaN' value.

When 'time_in' is a sequence of time stamps, a DatetimeArray should be used. Otherwise, a np.ndarray should be used.

Parameters:

Name Type Description Default
time_in Union[ndarray, DatetimeArray]

A vector of times which correspond to values_in.

required
values_in ndarray

A vector of the values to be resampled.

required
time_bin_edges Union[ndarray, DatetimeArray]

A vector of times which define the edges of the bins into which the data will be resampled.

required
aggregate_function str

The function which is used to aggregate the data after it has been sorted into bins. Defaults to mean.

'mean'
side str

Which side of the time bins should be used to generate times_out. Possible values are: 'left', 'center', and 'right'. Defaults to 'center'.

'center'

Returns:

Name Type Description
time_out Union[ndarray, DatetimeArray]

Vector-like object containing the times of the resampled values consistent with time_in dtype and side input argument.

values_out ndarray

A vector of resampled values, according to the time bins and the aggregate function.

Raises:

Type Description
ValueError

If any of the input arguments are not of the correct type or shape, this error is raised.

Source code in src/pyelq/support_functions/spatio_temporal_interpolation.py
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
def temporal_resampling(
    time_in: Union[np.ndarray, pd.arrays.DatetimeArray],
    values_in: np.ndarray,
    time_bin_edges: Union[np.ndarray, pd.arrays.DatetimeArray],
    aggregate_function: str = "mean",
    side: str = "center",
) -> Tuple[Union[np.ndarray, pd.arrays.DatetimeArray], np.ndarray]:
    """Resamples data into a set of time bins.

    Checks which values of time_in are withing 2 consecutive values of time_bin_edges and performs the aggregate
    function on the corresponding values from values_in. time_in values outside the time_bin_edges are ignored.
    Empty bins will be assigned a 'NaN' value.

    When 'time_in' is a sequence of time stamps, a DatetimeArray should be used. Otherwise, a np.ndarray should be used.

    Args:
        time_in (Union[np.ndarray, pd.arrays.DatetimeArray]): A vector of times which correspond to values_in.
        values_in (np.ndarray): A vector of the values to be resampled.
        time_bin_edges (Union[np.ndarray, pd.arrays.DatetimeArray]): A vector of times which define the edges of the
                                                                     bins into which the data will be resampled.
        aggregate_function (str, optional): The function which is used to aggregate the data after it has been
                                            sorted into bins. Defaults to mean.
        side (str, optional): Which side of the time bins should be used to generate times_out. Possible values are:
                              'left', 'center', and 'right'. Defaults to 'center'.

    Returns:
        time_out (Union[np.ndarray, pd.arrays.DatetimeArray]): Vector-like object containing the times of the resampled
                                                               values consistent with time_in dtype and side input
                                                               argument.
        values_out (np.ndarray): A vector of resampled values, according to the time bins and the aggregate function.

    Raises:
        ValueError: If any of the input arguments are not of the correct type or shape, this error is raised.

    """
    if not isinstance(time_bin_edges, type(time_in)) or values_in.size != time_in.size:
        raise ValueError("Arguments 'time_in', 'time_bin_edges' and/or 'values_in' are not of consistent type or size.")

    if not isinstance(aggregate_function, str):
        raise ValueError("The supplied 'aggregate_function' is not a string.")

    if side == "center":
        time_out = np.diff(time_bin_edges) / 2 + time_bin_edges[:-1]
    elif side == "left":
        time_out = time_bin_edges[:-1]
    elif side == "right":
        time_out = time_bin_edges[1:]
    else:
        raise ValueError(f"The 'side' argument must be 'left', 'center', or 'right', but received '{side}'.")

    zero_value = 0
    if isinstance(time_bin_edges, pd.arrays.DatetimeArray):
        zero_value = np.array(0).astype("<m8[ns]")

    if not np.all(np.diff(time_bin_edges) > zero_value):
        raise ValueError("Argument 'time_bin_edges' does not monotonically increase.")

    if np.any(time_in < time_bin_edges[0]) or np.any(time_in > time_bin_edges[-1]):
        warnings.warn("Values in time_in are outside of range of time_bin_edges. These values will be ignored.")

    index = np.searchsorted(time_bin_edges, time_in, side="left")
    grouped_vals = pd.Series(values_in).groupby(index).agg(aggregate_function)
    grouped_vals = grouped_vals.drop(index=[0, time_bin_edges.size], errors="ignore").sort_index()

    values_out = np.full(time_out.shape, np.nan)
    values_out[grouped_vals.index - 1] = grouped_vals.to_numpy()

    return time_out, values_out