datagenerator.histogram_equalizer

Functions for applying historgram equalization to array to fit standard normal distribution (including kurtosis)

  1"""
  2Functions for applying historgram equalization to array
  3to fit standard normal distribution (including kurtosis)
  4"""
  5
  6import sys
  7from math import sqrt
  8
  9import numpy as np
 10from scipy.stats import kurtosis
 11
 12
 13def _derive_standard_normal(im, nbr_bins=256, verbose=False):
 14    ##############################################################
 15    ##    The function derives the equalization array to scale
 16    ##    the values in input array 'im' based on
 17    ##    a converson of its histogram into a uniform distribution.
 18    ##    When 'pcteq' is 100%, the output will have a uniform
 19    ##    distribution. When 'pcteq' is 0%, the function returns the
 20    ##    input array unchanged. 'pcteq' = 50% returns an array
 21    ##    with half the impact of conversion to a uniform distribution.
 22    ##
 23    ##    does not apply transform, just returns curve
 24    ##############################################################
 25
 26    from numpy import histogram
 27    from scipy.interpolate import interp1d
 28
 29    if verbose:
 30        print(" Deriving equalizing transform to standard normal shape")
 31
 32    # decimate to about 10000 traces. Perform calculations on decimated subset
 33    decimate = max(1, int(sqrt(im.flatten().shape[0] / 10000)))
 34    amin, amax = im.min(), im.max()
 35    _data = im.flatten()[::decimate]
 36    _data = np.hstack((amin, _data, amax))
 37    _data = np.hstack((_data, -_data))
 38
 39    # assume data is centered roughly at zero
 40    # find biggest abs value and make data range double that
 41    # (symmetrical about zero)
 42    # These are the bin boundaries
 43    histrange = _data.max() - _data.min()
 44    inputbins = np.arange(_data.min(), _data.max(), histrange / (nbr_bins - 1))
 45    if len(inputbins) == 0:
 46        return im
 47    imhist, bins = histogram(_data, bins=nbr_bins, density=True)
 48
 49    imhist[0] = 0.0
 50    imhist[-1] = 0.0
 51
 52    # calculate center of bins
 53    centerbins = np.linspace(_data.min(), _data.max(), nbr_bins)
 54    # centerbins  =np.empty(len(imhist),dtype=float)
 55    # for i in range(len(bins)-1):
 56    #    centerbins[i] = .5 * ( bins[i] + bins[i+1] )
 57
 58    # rescale to outsides of first and last bin
 59    # centerbins *= histrange / 2. / centerbins[-1]
 60
 61    # smooth input pdf using moving median filter to remove spike values
 62    imhistmean = np.empty(len(imhist), dtype=float)
 63    imhistmedian = np.empty(len(imhist), dtype=float)
 64    deltacdf = np.empty(len(imhist), dtype=float)
 65
 66    for i in range(len(imhist)):
 67        indexmin = max(0, i - 2)
 68        indexmax = min(i + 2, len(imhist))
 69        imhistmedian[i] = np.median(imhist[indexmin:indexmax])
 70
 71    if imhistmedian[imhistmedian != 0].shape[0] == 0:
 72        imhistmedian = imhist
 73
 74    imhistmedian[0] = 0.0
 75    cdf = imhistmedian.cumsum()  # cumulative distribution function
 76
 77    if cdf[-1] == 0:
 78        print(" ... in histeqDerive... bins = ", bins)
 79        print(" ... in histeqDerive... imhist = ", imhist)
 80
 81    # normalize cdf
 82    cdf /= cdf[-1]
 83
 84    ###
 85    ### create standard normal distribution to which to transform
 86    ###
 87    fit_vals = np.random.normal(loc=0.0, scale=1.0, size=_data.flatten().shape[0])
 88    fit_vals = np.hstack((fit_vals, -fit_vals))
 89
 90    # find biggest abs value and make data range double that
 91    # (symmetrical about zero)
 92    # These are the bin boundaries
 93    fit_histrange = fit_vals.max() - fit_vals.min()
 94    fit_inputbins = np.arange(
 95        fit_vals.min(), fit_vals.max(), fit_histrange / (nbr_bins - 1)
 96    )
 97    fit_imhist, fit_bins = histogram(fit_vals, bins=nbr_bins, density=True)
 98
 99    fit_imhist[0] = 0.0
100    fit_imhist[-1] = 0.0
101
102    # calculate center of bins
103    fit_centerbins = np.linspace(fit_vals.min(), fit_vals.max(), nbr_bins)
104    # fit_centerbins  = np.empty(len(fit_imhist), dtype=float)
105    # for i in range(len(fit_bins)-1):
106    #    fit_centerbins[i] = .5 * ( fit_bins[i] + fit_bins[i+1] )
107
108    # normalized cdf
109    fit_cdf = fit_imhist.cumsum()
110    fit_cdf /= fit_cdf[-1]
111
112    # compute deltacdf
113    f = interp1d(fit_cdf, cdf)
114    normed_centerbins = centerbins - centerbins.min()
115    normed_centerbins /= normed_centerbins.max()
116    deltacdf = f(normed_centerbins)
117    equality_line = np.linspace(0.0, 1.0, cdf.shape[0])
118    deltacdf = deltacdf - equality_line
119
120    # calculate target amplitude
121    # (force it to pass through zero and be symmetric)
122    target_centerbins = fit_centerbins + deltacdf * (
123        fit_centerbins.max() - fit_centerbins.min()
124    )
125    mirrored = -target_centerbins[::-1]
126    target_centerbins = (target_centerbins + mirrored) / 2.0
127
128    # compute output with value that approximately
129    # fit standard-normal distribution (including kurtosis)
130    f = interp1d(centerbins, target_centerbins)
131
132    # ensure stdev is 1.0
133    im_std = f(_data).std()
134
135    return centerbins, target_centerbins / im_std
136
137
138def _apply_standard_normal(im, centerbins, target_centerbins, verbose=False):
139    ##############################################################
140    ##    The function derives the equalization array to scale
141    ##    the values in input array 'im' based on
142    ##    a converson of its histogram into a uniform distribution.
143    ##    When 'pcteq' is 100%, the output will have a uniform
144    ##    distribution. When 'pcteq' is 0%, the function returns the
145    ##    input array unchanged. 'pcteq' = 50% returns an array
146    ##    with half the impact of conversion to a uniform distribution.
147    ##
148    ##    only applies transform curve from _derive_standard_normal
149    ##############################################################
150
151    from scipy.interpolate import interp1d
152
153    if verbose:
154        print(" Applying equalizing transform to standard normal shape")
155
156    # compute output with value that approximately fit
157    # standard-normal distribution (including kurtosis)
158    f = interp1d(centerbins, target_centerbins)
159
160    # output array
161    output_array = f(im)
162
163    return output_array
164
165
166def load_centerbins(centerbin_data, include_mean=False):
167    print("Loading centerbin data from: " + centerbin_data)
168    sys.stdout.flush()
169    data = np.load(centerbin_data)
170    centerbins = data["centerbins"]
171    target_centerbins = data["target_centerbins"]
172    if include_mean == True:
173        data_mean = data["mean"]
174        print("Done.")
175        sys.stdout.flush()
176        return centerbins, target_centerbins, data_mean
177    else:
178        print("Done.")
179        sys.stdout.flush()
180        return centerbins, target_centerbins
181
182
183def normalize_seismic(
184    seismic,
185    verbose=False,
186    centerbin_data=None,
187    save_file=None,
188    load_mean_and_cbs=False,
189    return_stats=False,
190    stats_in=None,
191):
192    """
193    Normalise a 4D volume numpy array
194    """
195
196    if stats_in != None:
197        centerbins, target_centerbins, seismic_mean = stats_in
198        seismic -= seismic_mean
199
200    elif load_mean_and_cbs == True:
201        centerbins, target_centerbins, seismic_mean = load_centerbins(
202            centerbin_data, include_mean=True
203        )
204        seismic -= seismic_mean
205
206    else:
207        seismic_mean = seismic.mean()
208        seismic -= seismic_mean
209        if centerbin_data is None:
210            centerbins, target_centerbins = _derive_standard_normal(seismic)
211        else:
212            centerbins, target_centerbins = load_centerbins(centerbin_data)
213
214    if verbose:
215        print(
216            "mean/std/kurtosis -- before: ",
217            seismic_mean,
218            seismic.std(),
219            kurtosis(seismic.flatten()),
220        )
221        sys.stdout.flush()
222
223    if save_file is not None:
224        print("Saving centerbin data to: " + save_file)
225        sys.stdout.flush()
226
227        np.savez(
228            save_file,
229            centerbins=centerbins,
230            target_centerbins=target_centerbins,
231            mean=seismic_mean,
232        )
233        print("Done.")
234        sys.stdout.flush()
235
236    seismic = _apply_standard_normal(seismic, centerbins, target_centerbins)
237    if verbose:
238        print(
239            "mean/std/kurtosis -- after:  ",
240            seismic.mean(),
241            seismic.std(),
242            kurtosis(seismic.flatten()),
243        )
244        sys.stdout.flush()
245    if return_stats == True:
246        return centerbins, target_centerbins, seismic_mean
247    else:
248        return seismic
def load_centerbins(centerbin_data, include_mean=False):
167def load_centerbins(centerbin_data, include_mean=False):
168    print("Loading centerbin data from: " + centerbin_data)
169    sys.stdout.flush()
170    data = np.load(centerbin_data)
171    centerbins = data["centerbins"]
172    target_centerbins = data["target_centerbins"]
173    if include_mean == True:
174        data_mean = data["mean"]
175        print("Done.")
176        sys.stdout.flush()
177        return centerbins, target_centerbins, data_mean
178    else:
179        print("Done.")
180        sys.stdout.flush()
181        return centerbins, target_centerbins
def normalize_seismic( seismic, verbose=False, centerbin_data=None, save_file=None, load_mean_and_cbs=False, return_stats=False, stats_in=None):
184def normalize_seismic(
185    seismic,
186    verbose=False,
187    centerbin_data=None,
188    save_file=None,
189    load_mean_and_cbs=False,
190    return_stats=False,
191    stats_in=None,
192):
193    """
194    Normalise a 4D volume numpy array
195    """
196
197    if stats_in != None:
198        centerbins, target_centerbins, seismic_mean = stats_in
199        seismic -= seismic_mean
200
201    elif load_mean_and_cbs == True:
202        centerbins, target_centerbins, seismic_mean = load_centerbins(
203            centerbin_data, include_mean=True
204        )
205        seismic -= seismic_mean
206
207    else:
208        seismic_mean = seismic.mean()
209        seismic -= seismic_mean
210        if centerbin_data is None:
211            centerbins, target_centerbins = _derive_standard_normal(seismic)
212        else:
213            centerbins, target_centerbins = load_centerbins(centerbin_data)
214
215    if verbose:
216        print(
217            "mean/std/kurtosis -- before: ",
218            seismic_mean,
219            seismic.std(),
220            kurtosis(seismic.flatten()),
221        )
222        sys.stdout.flush()
223
224    if save_file is not None:
225        print("Saving centerbin data to: " + save_file)
226        sys.stdout.flush()
227
228        np.savez(
229            save_file,
230            centerbins=centerbins,
231            target_centerbins=target_centerbins,
232            mean=seismic_mean,
233        )
234        print("Done.")
235        sys.stdout.flush()
236
237    seismic = _apply_standard_normal(seismic, centerbins, target_centerbins)
238    if verbose:
239        print(
240            "mean/std/kurtosis -- after:  ",
241            seismic.mean(),
242            seismic.std(),
243            kurtosis(seismic.flatten()),
244        )
245        sys.stdout.flush()
246    if return_stats == True:
247        return centerbins, target_centerbins, seismic_mean
248    else:
249        return seismic

Normalise a 4D volume numpy array