datagenerator.histogram_equalizer
Functions for applying historgram equalization to array to fit standard normal distribution (including kurtosis)
1""" 2Functions for applying historgram equalization to array 3to fit standard normal distribution (including kurtosis) 4""" 5 6import sys 7from math import sqrt 8 9import numpy as np 10from scipy.stats import kurtosis 11 12 13def _derive_standard_normal(im, nbr_bins=256, verbose=False): 14 ############################################################## 15 ## The function derives the equalization array to scale 16 ## the values in input array 'im' based on 17 ## a converson of its histogram into a uniform distribution. 18 ## When 'pcteq' is 100%, the output will have a uniform 19 ## distribution. When 'pcteq' is 0%, the function returns the 20 ## input array unchanged. 'pcteq' = 50% returns an array 21 ## with half the impact of conversion to a uniform distribution. 22 ## 23 ## does not apply transform, just returns curve 24 ############################################################## 25 26 from numpy import histogram 27 from scipy.interpolate import interp1d 28 29 if verbose: 30 print(" Deriving equalizing transform to standard normal shape") 31 32 # decimate to about 10000 traces. Perform calculations on decimated subset 33 decimate = max(1, int(sqrt(im.flatten().shape[0] / 10000))) 34 amin, amax = im.min(), im.max() 35 _data = im.flatten()[::decimate] 36 _data = np.hstack((amin, _data, amax)) 37 _data = np.hstack((_data, -_data)) 38 39 # assume data is centered roughly at zero 40 # find biggest abs value and make data range double that 41 # (symmetrical about zero) 42 # These are the bin boundaries 43 histrange = _data.max() - _data.min() 44 inputbins = np.arange(_data.min(), _data.max(), histrange / (nbr_bins - 1)) 45 if len(inputbins) == 0: 46 return im 47 imhist, bins = histogram(_data, bins=nbr_bins, density=True) 48 49 imhist[0] = 0.0 50 imhist[-1] = 0.0 51 52 # calculate center of bins 53 centerbins = np.linspace(_data.min(), _data.max(), nbr_bins) 54 # centerbins =np.empty(len(imhist),dtype=float) 55 # for i in range(len(bins)-1): 56 # centerbins[i] = .5 * ( bins[i] + bins[i+1] ) 57 58 # rescale to outsides of first and last bin 59 # centerbins *= histrange / 2. / centerbins[-1] 60 61 # smooth input pdf using moving median filter to remove spike values 62 imhistmean = np.empty(len(imhist), dtype=float) 63 imhistmedian = np.empty(len(imhist), dtype=float) 64 deltacdf = np.empty(len(imhist), dtype=float) 65 66 for i in range(len(imhist)): 67 indexmin = max(0, i - 2) 68 indexmax = min(i + 2, len(imhist)) 69 imhistmedian[i] = np.median(imhist[indexmin:indexmax]) 70 71 if imhistmedian[imhistmedian != 0].shape[0] == 0: 72 imhistmedian = imhist 73 74 imhistmedian[0] = 0.0 75 cdf = imhistmedian.cumsum() # cumulative distribution function 76 77 if cdf[-1] == 0: 78 print(" ... in histeqDerive... bins = ", bins) 79 print(" ... in histeqDerive... imhist = ", imhist) 80 81 # normalize cdf 82 cdf /= cdf[-1] 83 84 ### 85 ### create standard normal distribution to which to transform 86 ### 87 fit_vals = np.random.normal(loc=0.0, scale=1.0, size=_data.flatten().shape[0]) 88 fit_vals = np.hstack((fit_vals, -fit_vals)) 89 90 # find biggest abs value and make data range double that 91 # (symmetrical about zero) 92 # These are the bin boundaries 93 fit_histrange = fit_vals.max() - fit_vals.min() 94 fit_inputbins = np.arange( 95 fit_vals.min(), fit_vals.max(), fit_histrange / (nbr_bins - 1) 96 ) 97 fit_imhist, fit_bins = histogram(fit_vals, bins=nbr_bins, density=True) 98 99 fit_imhist[0] = 0.0 100 fit_imhist[-1] = 0.0 101 102 # calculate center of bins 103 fit_centerbins = np.linspace(fit_vals.min(), fit_vals.max(), nbr_bins) 104 # fit_centerbins = np.empty(len(fit_imhist), dtype=float) 105 # for i in range(len(fit_bins)-1): 106 # fit_centerbins[i] = .5 * ( fit_bins[i] + fit_bins[i+1] ) 107 108 # normalized cdf 109 fit_cdf = fit_imhist.cumsum() 110 fit_cdf /= fit_cdf[-1] 111 112 # compute deltacdf 113 f = interp1d(fit_cdf, cdf) 114 normed_centerbins = centerbins - centerbins.min() 115 normed_centerbins /= normed_centerbins.max() 116 deltacdf = f(normed_centerbins) 117 equality_line = np.linspace(0.0, 1.0, cdf.shape[0]) 118 deltacdf = deltacdf - equality_line 119 120 # calculate target amplitude 121 # (force it to pass through zero and be symmetric) 122 target_centerbins = fit_centerbins + deltacdf * ( 123 fit_centerbins.max() - fit_centerbins.min() 124 ) 125 mirrored = -target_centerbins[::-1] 126 target_centerbins = (target_centerbins + mirrored) / 2.0 127 128 # compute output with value that approximately 129 # fit standard-normal distribution (including kurtosis) 130 f = interp1d(centerbins, target_centerbins) 131 132 # ensure stdev is 1.0 133 im_std = f(_data).std() 134 135 return centerbins, target_centerbins / im_std 136 137 138def _apply_standard_normal(im, centerbins, target_centerbins, verbose=False): 139 ############################################################## 140 ## The function derives the equalization array to scale 141 ## the values in input array 'im' based on 142 ## a converson of its histogram into a uniform distribution. 143 ## When 'pcteq' is 100%, the output will have a uniform 144 ## distribution. When 'pcteq' is 0%, the function returns the 145 ## input array unchanged. 'pcteq' = 50% returns an array 146 ## with half the impact of conversion to a uniform distribution. 147 ## 148 ## only applies transform curve from _derive_standard_normal 149 ############################################################## 150 151 from scipy.interpolate import interp1d 152 153 if verbose: 154 print(" Applying equalizing transform to standard normal shape") 155 156 # compute output with value that approximately fit 157 # standard-normal distribution (including kurtosis) 158 f = interp1d(centerbins, target_centerbins) 159 160 # output array 161 output_array = f(im) 162 163 return output_array 164 165 166def load_centerbins(centerbin_data, include_mean=False): 167 print("Loading centerbin data from: " + centerbin_data) 168 sys.stdout.flush() 169 data = np.load(centerbin_data) 170 centerbins = data["centerbins"] 171 target_centerbins = data["target_centerbins"] 172 if include_mean == True: 173 data_mean = data["mean"] 174 print("Done.") 175 sys.stdout.flush() 176 return centerbins, target_centerbins, data_mean 177 else: 178 print("Done.") 179 sys.stdout.flush() 180 return centerbins, target_centerbins 181 182 183def normalize_seismic( 184 seismic, 185 verbose=False, 186 centerbin_data=None, 187 save_file=None, 188 load_mean_and_cbs=False, 189 return_stats=False, 190 stats_in=None, 191): 192 """ 193 Normalise a 4D volume numpy array 194 """ 195 196 if stats_in != None: 197 centerbins, target_centerbins, seismic_mean = stats_in 198 seismic -= seismic_mean 199 200 elif load_mean_and_cbs == True: 201 centerbins, target_centerbins, seismic_mean = load_centerbins( 202 centerbin_data, include_mean=True 203 ) 204 seismic -= seismic_mean 205 206 else: 207 seismic_mean = seismic.mean() 208 seismic -= seismic_mean 209 if centerbin_data is None: 210 centerbins, target_centerbins = _derive_standard_normal(seismic) 211 else: 212 centerbins, target_centerbins = load_centerbins(centerbin_data) 213 214 if verbose: 215 print( 216 "mean/std/kurtosis -- before: ", 217 seismic_mean, 218 seismic.std(), 219 kurtosis(seismic.flatten()), 220 ) 221 sys.stdout.flush() 222 223 if save_file is not None: 224 print("Saving centerbin data to: " + save_file) 225 sys.stdout.flush() 226 227 np.savez( 228 save_file, 229 centerbins=centerbins, 230 target_centerbins=target_centerbins, 231 mean=seismic_mean, 232 ) 233 print("Done.") 234 sys.stdout.flush() 235 236 seismic = _apply_standard_normal(seismic, centerbins, target_centerbins) 237 if verbose: 238 print( 239 "mean/std/kurtosis -- after: ", 240 seismic.mean(), 241 seismic.std(), 242 kurtosis(seismic.flatten()), 243 ) 244 sys.stdout.flush() 245 if return_stats == True: 246 return centerbins, target_centerbins, seismic_mean 247 else: 248 return seismic
def
load_centerbins(centerbin_data, include_mean=False):
167def load_centerbins(centerbin_data, include_mean=False): 168 print("Loading centerbin data from: " + centerbin_data) 169 sys.stdout.flush() 170 data = np.load(centerbin_data) 171 centerbins = data["centerbins"] 172 target_centerbins = data["target_centerbins"] 173 if include_mean == True: 174 data_mean = data["mean"] 175 print("Done.") 176 sys.stdout.flush() 177 return centerbins, target_centerbins, data_mean 178 else: 179 print("Done.") 180 sys.stdout.flush() 181 return centerbins, target_centerbins
def
normalize_seismic( seismic, verbose=False, centerbin_data=None, save_file=None, load_mean_and_cbs=False, return_stats=False, stats_in=None):
184def normalize_seismic( 185 seismic, 186 verbose=False, 187 centerbin_data=None, 188 save_file=None, 189 load_mean_and_cbs=False, 190 return_stats=False, 191 stats_in=None, 192): 193 """ 194 Normalise a 4D volume numpy array 195 """ 196 197 if stats_in != None: 198 centerbins, target_centerbins, seismic_mean = stats_in 199 seismic -= seismic_mean 200 201 elif load_mean_and_cbs == True: 202 centerbins, target_centerbins, seismic_mean = load_centerbins( 203 centerbin_data, include_mean=True 204 ) 205 seismic -= seismic_mean 206 207 else: 208 seismic_mean = seismic.mean() 209 seismic -= seismic_mean 210 if centerbin_data is None: 211 centerbins, target_centerbins = _derive_standard_normal(seismic) 212 else: 213 centerbins, target_centerbins = load_centerbins(centerbin_data) 214 215 if verbose: 216 print( 217 "mean/std/kurtosis -- before: ", 218 seismic_mean, 219 seismic.std(), 220 kurtosis(seismic.flatten()), 221 ) 222 sys.stdout.flush() 223 224 if save_file is not None: 225 print("Saving centerbin data to: " + save_file) 226 sys.stdout.flush() 227 228 np.savez( 229 save_file, 230 centerbins=centerbins, 231 target_centerbins=target_centerbins, 232 mean=seismic_mean, 233 ) 234 print("Done.") 235 sys.stdout.flush() 236 237 seismic = _apply_standard_normal(seismic, centerbins, target_centerbins) 238 if verbose: 239 print( 240 "mean/std/kurtosis -- after: ", 241 seismic.mean(), 242 seismic.std(), 243 kurtosis(seismic.flatten()), 244 ) 245 sys.stdout.flush() 246 if return_stats == True: 247 return centerbins, target_centerbins, seismic_mean 248 else: 249 return seismic
Normalise a 4D volume numpy array