datagenerator.Parameters
1from collections import defaultdict 2import datetime 3import json 4import multiprocessing as mp 5import os 6import pathlib 7import glob 8import sqlite3 9from subprocess import CalledProcessError 10import numpy as np 11import tables 12import subprocess 13 14dir_name = os.path.dirname(__file__) 15CONFIG_PATH = os.path.abspath(os.path.join(dir_name, "../config/config_ht.json")) 16 17 18class _Borg: 19 # Any objects using Borg base class will have same shared_state values (Alex Martelli's Borg) 20 _shared_state = {} 21 22 def __init__(self): 23 self.__dict__ = self._shared_state 24 25 26class Parameters(_Borg): 27 """ 28 Parameter object storing all model parameters. 29 30 Attributes 31 ---------- 32 model_dir_name : str 33 This is the name that the directory will be given, by default `seismic` 34 parameter_file : str 35 User parameters are read from the 'user_config' json file, and 36 additional model parameters are set. 37 test_mode : int 38 If test_mode is set using an integer, the size of the model will 39 be reduced e.g 100 makes a 100x100.It reduces the ammount of 40 time that the program takes to generate data usueful when testing 41 a model. 42 43 **Warning: If you put too small a number, the model may fail due to 44 not enough space to place faults etc...** 45 46 Value should ideally be >= 50 47 runid : str 48 The string runid will be added to the final model directory. 49 rpm_scaling_factors : dict 50 These are user-defined parameter. You can use the defaults 51 provided, but results might be unrealistic. 52 These might need to be tuned to get reaslistic synthetic 53 data. 54 sqldict : dict 55 This is a dictionary structure that stores all the parameters 56 of the model. This dictionary eventually gets written to a 57 sqlite DB file. 58 59 Methods 60 ------- 61 setup_model(rpm_factors=None) -> None: 62 Method to set up all the necesary parameters to start a new model. 63 make_directories() -> None: 64 Method that generates all necessary directory structures on disk 65 write_key_file(): 66 Method to generate a key file that describes coordinate systems, 67 track, bin, digi (inline, xlines, ) 68 write_to_logfile(msg, mainkey=None, subkey=None, val=""): 69 Method that writes to the logfile 70 """ 71 72 def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None): 73 """ 74 Initialize the Parameters object. 75 76 Parameters 77 ---------- 78 user_config : `str`, optional 79 This is the path on disk that points to a `.json` file 80 that contains the configurations for each run, by default CONFIG_PATH 81 test_mode : `int`, optional 82 The parameter that sets the running mode, by default 0 83 runid : `str`, optional 84 This is the runid of the run, this comes in handy when you have many runs 85 with various permutations of parameters, by default None 86 """ 87 # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty 88 self._shared_state = {} 89 super().__init__() 90 self.model_dir_name: str = "seismic" 91 self.parameter_file = user_config 92 self.test_mode = test_mode 93 self.runid = runid 94 self.rpm_scaling_factors = dict() 95 self.sqldict = defaultdict(dict) 96 97 def __repr__(self): 98 """ 99 Representation method 100 101 Parameters 102 ---------- 103 self : `Parameters` 104 The instance of the Parameters object 105 """ 106 # Make nice repr instead of a print method 107 items = ("\t{} = {}".format(k, v) for k, v in self.__dict__.items()) 108 return "{}:\n{}".format(self.__class__.__name__, "\n".join(sorted(items))) 109 110 def __getitem__(self, key: str): 111 """__getitem__ 112 113 Enable retrieval of values as though the class instance is a dict 114 115 Parameters 116 ---------- 117 key : str 118 The key desired to be accessed 119 120 Returns 121 ------- 122 any 123 Value of the key 124 """ 125 return self._shared_state[key] 126 127 def setup_model(self, rpm_factors=None) -> None: 128 """ 129 Setup Model 130 ----------- 131 Sets up the creation of essential parameters and directories 132 133 Parameters 134 ---------- 135 rpm_factors : `dict`, optional 136 The rock physics model factors for generating the synthetic cube. 137 By default the rpm factors come from a default in the main.py file 138 139 Returns 140 ------- 141 None 142 """ 143 # Set model parameters 144 self._set_model_parameters(self.model_dir_name) 145 self.make_directories() 146 self.write_key_file() 147 self._setup_rpm_scaling_factors(rpm_factors) 148 149 # Write model parameters to logfile 150 self._write_initial_model_parameters_to_logfile() 151 152 def make_directories(self) -> None: 153 """ 154 Make directories. 155 ----------------- 156 157 Creates the necessary directories to run the model. 158 159 This function creates the directories on disk 160 necessary for the model to run. 161 162 Parameters 163 ---------- 164 self : `Parameters` 165 166 Returns 167 ------- 168 None 169 """ 170 print(f"\nModel folder: {self.work_subfolder}") 171 self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name 172 for folder in [self.project_folder, self.work_subfolder, self.temp_folder]: 173 try: 174 os.stat(folder) 175 except OSError: 176 print(f"Creating directory: {folder}") 177 # Try making folders (can fail if multiple models are being built simultaneously in a new dir) 178 try: 179 os.mkdir(folder) 180 except OSError: 181 pass 182 try: 183 os.system(f"chmod -R 777 {self.work_subfolder}") 184 except OSError: 185 print(f"Could not chmod {self.work_subfolder}. Continuing...") 186 pass 187 188 def write_key_file(self) -> None: 189 """ 190 Write key file 191 -------------- 192 193 Writes a file that ocntains important parameters about the cube. 194 195 Method that writes important parameters about the synthetic cube 196 such as coordinate transforms and sizes. 197 198 Parameters 199 ---------- 200 None 201 202 Returns 203 ---------- 204 None 205 """ 206 # Set plausible key file values 207 geom_expand = dict() 208 geom_expand["3D_NAME"] = "synthetic data for training" 209 geom_expand["COORD_METHOD"] = 1 210 geom_expand["DATA_TYPE"] = "3D" 211 geom_expand["DELTA_BIN_NUM"] = 1 212 geom_expand["DELTA_TRACK_NUM"] = 1 213 geom_expand["DIGITIZATION"] = 4 214 geom_expand["EPSG_CRS"] = 32066 215 geom_expand["FIRST_BIN"] = 1000 216 geom_expand["FIRST_TRACK"] = 2000 217 geom_expand["FORMAT"] = 1 218 geom_expand["N_BIN"] = self.cube_shape[1] 219 geom_expand["N_SAMP"] = self.cube_shape[2] 220 geom_expand["N_TRACK"] = self.cube_shape[0] 221 geom_expand["PROJECTION"] = 316 222 geom_expand["REAL_DELTA_X"] = 100.0 223 geom_expand["REAL_DELTA_Y"] = 100.0 224 geom_expand["REAL_GEO_X"] = 1250000.0 225 geom_expand["REAL_GEO_Y"] = 10500000.0 226 geom_expand["SKEW_ANGLE"] = 0.0 227 geom_expand["SUBPOINT_CODE"] = "TTTBBB" 228 geom_expand["TIME_OR_DEPTH"] = "TIME" 229 geom_expand["TRACK_DIR"] = "H" 230 geom_expand["XFORM_TO_WGS84"] = 1241 231 geom_expand["ZERO_TIME"] = 0 232 233 # Write the keyfile 234 outputkey = os.path.join( 235 self.work_subfolder, "seismicCube_" + self.date_stamp + ".key" 236 ) 237 with open(outputkey, "w") as key: 238 key.write( 239 "{}MESSAGE_FILE\n".format(20 * " ") 240 ) # spaces are important here.. Require 20 of them 241 key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"]) 242 key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"])) 243 key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"]) 244 key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"])) 245 key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"])) 246 key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"])) 247 key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"])) 248 key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"])) 249 key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"])) 250 key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"])) 251 key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"])) 252 key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"])) 253 key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"])) 254 key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"])) 255 key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"])) 256 key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"])) 257 key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"])) 258 key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"])) 259 key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"])) 260 key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"]) 261 key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"]) 262 key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"]) 263 key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"])) 264 key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"])) 265 print(f"\nKeyfile created at {outputkey}") 266 267 def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None: 268 """ 269 write_to_logfile 270 271 Method to write msg to model_parameter file 272 (includes newline) 273 274 Parameters 275 ---------- 276 msg : `string` 277 Required string object that will be written tom model parameter file. 278 mainkey : `string` 279 String of the key to be written into de sql dictionary. 280 subkey : `string` 281 String of the subkey to be written into de sql dictionary. 282 val : `string` 283 String of the value that should be written into the sql dictionary. 284 285 Returns 286 ------- 287 None 288 """ 289 if msg is not None: 290 with open(self.logfile, "a") as f: 291 f.write(f"{msg}\n") 292 if mainkey is not None: 293 self.sqldict[mainkey][subkey] = val 294 # for k, v in self.sqldict.items(): 295 # print(f"{k}: {v}") 296 297 def write_sqldict_to_logfile(self, logfile=None) -> None: 298 """ 299 write_sqldict_to_logfile 300 301 Write the sql dictionary to the logfile 302 303 Parameters 304 ---------- 305 logfile : `string` 306 The path to the log file. By default None 307 308 Returns 309 ------- 310 None 311 """ 312 if logfile is None: 313 logfile = self.logfile 314 with open(logfile, "a") as f: 315 for k, nested in self.sqldict.items(): 316 print(k, file=f) 317 if k == "model_id": 318 print(f"\t{nested}", file=f) 319 else: 320 for subkey, value in nested.items(): 321 print(f"\t{subkey}: {value}", file=f) 322 print(file=f) 323 324 def write_sqldict_to_db(self) -> None: 325 """ 326 write_sqldict_to_db 327 328 Method to write the sqldict to database sqlite file 329 330 Parameters 331 ---------- 332 None 333 334 Returns 335 ------- 336 None 337 """ 338 model_id = pathlib.Path(self.work_subfolder).name 339 model_parameters = self.sqldict["model_parameters"] 340 fault_keys = [k for k in self.sqldict.keys() if "fault" in k] 341 closure_keys = [k for k in self.sqldict.keys() if "closure" in k] 342 343 conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db")) 344 # tables = ["model_parameters", "fault_parameters", "closure_parameters"] 345 # create tables 346 sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})" 347 conn.execute(sql) 348 # insert model_parameters 349 columns = "model_id, " + ", ".join(model_parameters.keys()) 350 placeholders = ", ".join("?" * (len(model_parameters) + 1)) 351 sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})" 352 values = tuple([model_id] + [str(x) for x in model_parameters.values()]) 353 conn.execute(sql, values) 354 conn.commit() 355 356 # fault parameters 357 if len(fault_keys) > 0: 358 f = tuple(self.sqldict[fault_keys[0]].keys()) 359 sql = f"CREATE TABLE fault_parameters ({','.join(f)})" 360 conn.execute(sql) 361 columns = ", ".join(self.sqldict[fault_keys[0]].keys()) 362 placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys())) 363 for f in fault_keys: 364 sql = ( 365 f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})" 366 ) 367 conn.execute(sql, tuple(self.sqldict[f].values())) 368 conn.commit() 369 370 if len(closure_keys) > 0: 371 c = tuple(self.sqldict[closure_keys[0]].keys()) 372 sql = f"CREATE TABLE closure_parameters ({','.join(c)})" 373 conn.execute(sql) 374 columns = ", ".join(self.sqldict[closure_keys[0]].keys()) 375 placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys())) 376 for c in closure_keys: 377 sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})" 378 conn.execute(sql, tuple(self.sqldict[c].values())) 379 conn.commit() 380 381 def _setup_rpm_scaling_factors(self, rpm_factors: dict) -> None: 382 """ 383 Setup Rock Physics Model scaling factors 384 ---------------------------------------- 385 386 Method to initialize all the rock physics model 387 scaling factors. Method also writes the values to 388 the model_parameters log file. 389 390 Parameters 391 ---------- 392 TODO remove the default in the main.py or have a single source of truth 393 rpm_factors : `dict` 394 Dictionary containing the scaling factors for the RPM. 395 If no RPM factors are provided, the default values are used. 396 397 Returns 398 ------- 399 None 400 """ 401 if rpm_factors and not self.test_mode: 402 self.rpm_scaling_factors = rpm_factors 403 else: 404 # Use defaults for RPM Z-shifts and scaling factors 405 self.rpm_scaling_factors = dict() 406 self.rpm_scaling_factors["layershiftsamples"] = int( 407 np.random.triangular(35, 75, 125) 408 ) 409 self.rpm_scaling_factors["RPshiftsamples"] = int( 410 np.random.triangular(5, 11, 20) 411 ) 412 self.rpm_scaling_factors["shalerho_factor"] = 1.0 413 self.rpm_scaling_factors["shalevp_factor"] = 1.0 414 self.rpm_scaling_factors["shalevs_factor"] = 1.0 415 self.rpm_scaling_factors["sandrho_factor"] = 1.0 416 self.rpm_scaling_factors["sandvp_factor"] = 1.0 417 self.rpm_scaling_factors["sandvs_factor"] = 1.0 418 self.rpm_scaling_factors["nearfactor"] = 1.0 419 self.rpm_scaling_factors["midfactor"] = 1.0 420 self.rpm_scaling_factors["farfactor"] = 1.0 421 # Write factors to logfile 422 for k, v in self.rpm_scaling_factors.items(): 423 self.write_to_logfile( 424 msg=f"{k}: {v}", mainkey="model_parameters", subkey=k, val=v 425 ) 426 427 def _set_model_parameters(self, dname: str) -> None: 428 """ 429 Set Model Parameters 430 ---------------------------------------- 431 432 Method that sets model parameters from user-provided 433 config.json file 434 435 Parameters 436 ---------- 437 dname : `str` 438 Directory name specified in the configuration file, 439 or the default is used 440 441 Returns 442 ------- 443 None 444 """ 445 self.current_dir = os.getcwd() 446 self.start_time = datetime.datetime.now() 447 self.date_stamp = self.year_plus_fraction() 448 449 # Read from input json 450 self.parameters_json = self._read_json() 451 self._read_user_params() 452 453 # Directories 454 model_dir = f"{dname}__{self.date_stamp}" 455 temp_dir = f"temp_folder__{self.date_stamp}" 456 self.work_subfolder = os.path.abspath( 457 os.path.join(self.project_folder, model_dir) 458 ) 459 self.temp_folder = os.path.abspath( 460 os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}") 461 ) 462 if self.runid: 463 self.work_subfolder = f"{self.work_subfolder}_{self.runid}" 464 self.temp_folder = f"{self.temp_folder}_{self.runid}" 465 466 # Various model parameters, not in config 467 self.num_lyr_lut = self.cube_shape[2] * 2 * self.infill_factor 468 # 2500 voxels = 25x25x4m voxels size, 25% porosity and closures > ~40,000 bbl 469 # Use the minimum voxel count as initial closure size filter 470 self.closure_min_voxels = min( 471 self.closure_min_voxels_simple, 472 self.closure_min_voxels_faulted, 473 self.closure_min_voxels_onlap, 474 ) 475 self.order = self.bandwidth_ord 476 477 if self.test_mode: 478 self._set_test_mode(self.test_mode, self.test_mode) 479 480 # Random choices are separated into this method 481 self._randomly_chosen_model_parameters() 482 # Fault choices 483 self._fault_settings() 484 485 # Logfile 486 self.logfile = os.path.join( 487 self.work_subfolder, f"model_parameters_{self.date_stamp}.txt" 488 ) 489 490 # HDF file to store various model data 491 self.hdf_master = os.path.join( 492 self.work_subfolder, f"seismicCube__{self.date_stamp}.hdf" 493 ) 494 495 def _calculate_snr_after_lateral_filter(self, sn_db: float) -> float: 496 """ 497 Calculate Signal:Noise Ratio after lateral filter 498 ---------------------------------------- 499 500 Method that computes the signal to noise ratio after 501 the lateral filter is applied. 502 503 Parameters 504 ---------- 505 sn_db : `float` 506 Value of the signal to noise value from the database 507 508 Returns 509 ------- 510 pre_smear_snr : `float` 511 Signal to noise ratio after the lateral filter is applied 512 """ 513 snr_of_lateral_filter = 10 * np.log10(self.lateral_filter_size ** 2) 514 pre_smear_snr = sn_db - snr_of_lateral_filter 515 return pre_smear_snr 516 517 def _randomly_chosen_model_parameters(self) -> None: 518 """ 519 Randomly Chosen Model Parameters 520 ---------------------------------------- 521 522 Method that sets all randomly chosen model parameters 523 524 Parameters 525 ---------- 526 None 527 528 Returns 529 ------- 530 None 531 """ 532 # Initial layer standard deviation 533 self.initial_layer_stdev = ( 534 np.random.uniform(self.lyr_stdev[0], high=self.lyr_stdev[1]) 535 * self.infill_factor 536 ) 537 538 # lateral filter size, either 1x1, 3x3 or 5x5 539 self.lateral_filter_size = int(np.random.uniform(0, 2) + 0.5) * 2 + 1 540 541 # Signal to noise in decibels 542 sn_db = triangle_distribution_fix( 543 left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2] 544 ) 545 # sn_db = np.random.triangular(left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2]) 546 # self.sn_db = self._calculate_snr_after_lateral_filter(sn_db) 547 self.sn_db = sn_db 548 549 # Percentage of layers that are sand 550 self.sand_layer_pct = np.random.uniform( 551 low=self.sand_layer_pct_min, high=self.sand_layer_pct_max 552 ) 553 554 # Minimum shallowest depth of seabed 555 if ( 556 len(self.seabed_min_depth) > 1 557 ): # if low/high value provided, select a value between these 558 self.seabed_min_depth = np.random.randint( 559 low=self.seabed_min_depth[0], high=self.seabed_min_depth[1] 560 ) 561 562 # Low/High bandwidth to be used 563 self.lowfreq = np.random.uniform(self.bandwidth_low[0], self.bandwidth_low[1]) 564 self.highfreq = np.random.uniform( 565 self.bandwidth_high[0], self.bandwidth_high[1] 566 ) 567 568 # Choose whether to add coherent noise 569 self.add_noise = np.random.choice((0, 1)) 570 if self.add_noise == 1: 571 self.smiley_or_frowny = np.random.choice((0, 1)) 572 if self.smiley_or_frowny == 1: 573 self.fnoise = "random_coherent_frowns" 574 print("Coherent frowns will be inserted") 575 else: 576 self.fnoise = "random_coherent_smiles" 577 print("Coherent smiles will be inserted") 578 else: 579 self.fnoise = "random" 580 print("No coherent noise will be inserted") 581 582 # Salt inclusion 583 # self.include_salt = np.random.choice([True, False], 1, p=[0.5, 0.5])[0] 584 self.noise_stretch_factor = np.random.uniform(1.15, 1.35) 585 if self.include_salt: 586 print( 587 "Salt will be inserted. noise_stretch_factor = {}".format( 588 np.around(self.noise_stretch_factor, 2) 589 ) 590 ) 591 else: 592 print("Salt will be NOT be inserted.") 593 594 def _read_json(self) -> dict: 595 # TODO Move this to a separate function in utlis? 596 """ 597 Read JSON file 598 ---------------------------------------- 599 600 Reads a json file on disk and loads it as 601 dictionary 602 603 Parameters 604 ---------- 605 None 606 607 Returns 608 ------- 609 config : `dict` 610 Dictionary with the configuration options 611 """ 612 with open(self.parameter_file) as f: 613 config: dict = json.load(f) 614 return config 615 616 def _read_user_params(self) -> None: 617 """ 618 Read User Params 619 ---------------------------------------- 620 621 Takes the read in dictionary of JSON configuration 622 and reads each parameter and inserts it into the 623 attributes. 624 625 In the end it prints a summary of the parameters 626 to the console. 627 628 Parameters 629 ---------- 630 None 631 632 Returns 633 ------- 634 None 635 """ 636 d = self._read_json() 637 self.project = d["project"] 638 self.project_folder = d["project_folder"] 639 wfolder = d["work_folder"] 640 if not os.path.exists(wfolder): 641 wfolder = "/tmp" # In case work_folder does not exist, use /tmp 642 self.work_folder = wfolder 643 # read parameters into Parameter class attributes 644 self.cube_shape = tuple(d["cube_shape"]) 645 self.incident_angles = tuple(d["incident_angles"]) 646 self.digi = d["digi"] 647 self.infill_factor = d["infill_factor"] 648 self.lyr_stdev = d["initial_layer_stdev"] 649 self.thickness_min = d["thickness_min"] 650 self.thickness_max = d["thickness_max"] 651 self.seabed_min_depth = d["seabed_min_depth"] 652 self.snr_db = d["signal_to_noise_ratio_db"] 653 # self.random_depth_perturb = d['random_depth_perturb_range'] 654 self.bandwidth_low = d["bandwidth_low"] 655 self.bandwidth_high = d["bandwidth_high"] 656 self.bandwidth_ord = d["bandwidth_ord"] 657 self.dip_factor_max = d["dip_factor_max"] 658 self.min_number_faults = d["min_number_faults"] 659 self.max_number_faults = d["max_number_faults"] 660 self.basin_floor_fans = d["basin_floor_fans"] 661 self.pad_samples = d["pad_samples"] 662 self.qc_plots = d["extra_qc_plots"] 663 self.verbose = d["verbose"] 664 self.include_channels = d["include_channels"] 665 self.include_salt = d["include_salt"] 666 self.max_column_height = d["max_column_height"] 667 self.closure_types = d["closure_types"] 668 self.closure_min_voxels_simple = d["min_closure_voxels_simple"] 669 self.closure_min_voxels_faulted = d["min_closure_voxels_faulted"] 670 self.closure_min_voxels_onlap = d["min_closure_voxels_onlap"] 671 self.partial_voxels = d["partial_voxels"] 672 self.variable_shale_ng = d["variable_shale_ng"] 673 self.sand_layer_thickness = d["sand_layer_thickness"] 674 self.sand_layer_pct_min = d["sand_layer_fraction"]["min"] 675 self.sand_layer_pct_max = d["sand_layer_fraction"]["max"] 676 self.hdf_store = d["write_to_hdf"] 677 self.broadband_qc_volume = d["broadband_qc_volume"] 678 self.model_qc_volumes = d["model_qc_volumes"] 679 self.multiprocess_bp = d["multiprocess_bp"] 680 681 # print em 682 self.__repr__() 683 684 def _set_test_mode(self, size_x: int = 50, size_y: int = 50) -> None: 685 """ 686 Set test mode 687 ------------- 688 689 Sets whether the parameters for testinf mode. If no size integer 690 is provided is defaults to 50. 691 692 This value is a good minimum because it allows for the 3D model 693 to be able to contain faults and other objects inside. 694 695 Parameters 696 ---------- 697 size_x : `int` 698 The parameter that sets the size of the model in the x direction 699 size_y : `int` 700 The parameter that sets the size of the model in the y direction 701 702 Returns 703 ------- 704 None 705 """ 706 707 # Set output model folder in work_folder location but with same directory name as project_folder 708 normpath = ( 709 os.path.normpath(self.project_folder) + "_test_mode_" 710 ) # strip trailing / if added 711 new_project_folder = os.path.join(self.work_folder, os.path.basename(normpath)) 712 # Put all folders inside project folder for easy deleting 713 self.work_folder = new_project_folder 714 self.project_folder = new_project_folder 715 self.work_subfolder = os.path.join( 716 new_project_folder, os.path.basename(self.work_subfolder) 717 ) 718 if self.runid: 719 # Append runid if provided 720 self.temp_folder = f"{self.temp_folder}_{self.runid}__{self.date_stamp}" 721 else: 722 self.temp_folder = os.path.abspath( 723 os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}") 724 ) 725 # Set smaller sized model 726 self.cube_shape = tuple([size_x, size_y, self.cube_shape[-1]]) 727 # Print message to user 728 print( 729 "{0}\nTesting Mode\nOutput Folder: {1}\nCube_Shape: {2}\n{0}".format( 730 36 * "-", self.project_folder, self.cube_shape 731 ) 732 ) 733 734 def _fault_settings(self) -> None: 735 """ 736 Set Fault Settings 737 ------------- 738 739 Sets the parameters that will be used to generate faults throughout 740 the synthetic model. 741 742 Parameters 743 ---------- 744 None 745 746 Returns 747 ------- 748 None 749 """ 750 # Fault parameters 751 self.low_fault_throw = 5.0 * self.infill_factor 752 self.high_fault_throw = 35.0 * self.infill_factor 753 754 # mode & clustering are randomly chosen 755 self.mode = np.random.choice([0, 1, 2], 1)[ 756 0 757 ] 758 self.clustering = np.random.choice([0, 1, 2], 1)[0] 759 760 if self.mode == 0: 761 # As random as it can be 762 self.number_faults = np.random.randint( 763 self.min_number_faults, self.max_number_faults 764 ) 765 self.fmode = "random" 766 767 elif self.mode == 1: 768 if self.clustering == 0: 769 self.fmode = "self_branching" 770 # Self Branching. avoid large fault 771 self.number_faults = np.random.randint( 772 3, 9 773 ) 774 self.low_fault_throw = 5.0 * self.infill_factor 775 self.high_fault_throw = 15.0 * self.infill_factor 776 if self.clustering == 1: 777 # Stair case 778 self.fmode = "stair_case" 779 self.number_faults = np.random.randint( 780 5, self.max_number_faults 781 ) 782 if self.clustering == 2: 783 # Relay ramps 784 self.fmode = "relay_ramp" 785 self.number_faults = np.random.randint( 786 3, 9 787 ) 788 self.low_fault_throw = 5.0 * self.infill_factor 789 self.high_fault_throw = 15.0 * self.infill_factor 790 elif self.mode == 2: 791 # Horst and graben 792 self.fmode = "horst_and_graben" 793 self.number_faults = np.random.randint( 794 3, 7 795 ) 796 797 self.fault_param = [ 798 str(self.mode) + str(self.clustering), 799 self.number_faults, 800 self.low_fault_throw, 801 self.high_fault_throw, 802 ] 803 804 def _get_commit_hash(self) -> str: 805 """ 806 Get Commit Hash 807 ------------- 808 809 Gets the commit hash of the current git repository. 810 811 #TODO Explain what this is for exactly 812 813 Parameters 814 ---------- 815 None 816 817 Returns 818 ------- 819 sha : `str` 820 The commit hash of the current git repository 821 """ 822 823 try: 824 sha = ( 825 subprocess.check_output(["git", "rev-parse", "HEAD"]) 826 .decode("utf-8") 827 .strip() 828 ) 829 except CalledProcessError: 830 sha = "cwd not a git repository" 831 return sha 832 833 def _write_initial_model_parameters_to_logfile(self) -> None: 834 """ 835 Write Initial Model Parameters to Logfile 836 ---------------------------------------- 837 838 Method that writes the initial parameters set for the model 839 to the logfile. 840 841 Parameters 842 ---------- 843 None 844 845 Returns 846 ------- 847 None 848 """ 849 _sha = self._get_commit_hash() 850 self.write_to_logfile( 851 f"SHA: {_sha}", mainkey="model_parameters", subkey="sha", val=_sha 852 ) 853 self.write_to_logfile( 854 f"modeling start time: {self.start_time}", 855 mainkey="model_parameters", 856 subkey="start_time", 857 val=self.start_time, 858 ) 859 self.write_to_logfile( 860 f"project_folder: {self.project_folder}", 861 mainkey="model_parameters", 862 subkey="project_folder", 863 val=self.project_folder, 864 ) 865 self.write_to_logfile( 866 f"work_subfolder: {self.work_subfolder}", 867 mainkey="model_parameters", 868 subkey="work_subfolder", 869 val=self.work_subfolder, 870 ) 871 self.write_to_logfile( 872 f"cube_shape: {self.cube_shape}", 873 mainkey="model_parameters", 874 subkey="cube_shape", 875 val=self.cube_shape, 876 ) 877 self.write_to_logfile( 878 f"incident_angles: {self.incident_angles}", 879 mainkey="model_parameters", 880 subkey="incident_angles", 881 val=self.incident_angles, 882 ) 883 self.write_to_logfile( 884 f"number_faults: {self.number_faults}", 885 mainkey="model_parameters", 886 subkey="number_faults", 887 val=self.number_faults, 888 ) 889 self.write_to_logfile( 890 f"lateral_filter_size: {self.lateral_filter_size}", 891 mainkey="model_parameters", 892 subkey="lateral_filter_size", 893 val=self.lateral_filter_size, 894 ) 895 self.write_to_logfile( 896 f"salt_inserted: {self.include_salt}", 897 mainkey="model_parameters", 898 subkey="salt_inserted", 899 val=self.include_salt, 900 ) 901 self.write_to_logfile( 902 f"salt noise_stretch_factor: {self.noise_stretch_factor:.2f}", 903 mainkey="model_parameters", 904 subkey="salt_noise_stretch_factor", 905 val=self.noise_stretch_factor, 906 ) 907 self.write_to_logfile( 908 f"bandpass_bandlimits: {self.lowfreq:.2f}, {self.highfreq:.2f}" 909 ) 910 self.write_to_logfile( 911 msg=None, 912 mainkey="model_parameters", 913 subkey="bandpass_bandlimit_low", 914 val=self.lowfreq, 915 ) 916 self.write_to_logfile( 917 msg=None, 918 mainkey="model_parameters", 919 subkey="bandpass_bandlimit_high", 920 val=self.highfreq, 921 ) 922 self.write_to_logfile( 923 f"sn_db: {self.sn_db:.2f}", 924 mainkey="model_parameters", 925 subkey="sn_db", 926 val=self.sn_db, 927 ) 928 self.write_to_logfile( 929 f"initial layer depth stdev (flatness of layer): {self.initial_layer_stdev:.2f}", 930 mainkey="model_parameters", 931 subkey="initial_layer_stdev", 932 val=self.initial_layer_stdev, 933 ) 934 935 @staticmethod 936 def year_plus_fraction() -> str: 937 # TODO Move this to utils separate module 938 """ 939 Year Plus Fraction 940 ---------------------------------------- 941 942 Method generates a time stamp in the format of 943 year + fraction of year. 944 945 Parameters 946 ---------- 947 None 948 949 Returns 950 ------- 951 fraction of the year : str 952 The time stamp in the format of year + fraction of year 953 954 """ 955 now = datetime.datetime.now() 956 year = now.year 957 secs_in_year = datetime.timedelta(days=365).total_seconds() 958 fraction_of_year = ( 959 now - datetime.datetime(year, 1, 1, 0, 0) 960 ).total_seconds() / secs_in_year 961 return format(year + fraction_of_year, "14.8f").replace(" ", "") 962 963 def hdf_setup(self, hdf_name: str) -> None: 964 """ 965 Setup HDF files 966 --------------- 967 968 This method sets up the HDF structures 969 970 Parameters 971 ---------- 972 hdf_name : str 973 The name of the HDF file to be created 974 975 Returns 976 ------- 977 None 978 """ 979 num_threads = min(8, mp.cpu_count() - 1) 980 tables.set_blosc_max_threads(num_threads) 981 self.hdf_filename = os.path.join(self.temp_folder, hdf_name) 982 self.filters = tables.Filters( 983 complevel=5, complib="blosc" 984 ) # compression with fast write speed 985 self.h5file = tables.open_file(self.hdf_filename, "w") 986 self.h5file.create_group("/", "ModelData") 987 988 def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray: 989 990 """ 991 HDF Initialize 992 ---------------------------------------- 993 994 Method that initializes the HDF chunked 995 array 996 997 Parameters 998 ---------- 999 dset_name : str 1000 The name of the dataset to be created 1001 shape : tuple 1002 1003 1004 Returns 1005 ------- 1006 new_array: tables.CArray 1007 """ 1008 if "float" in dtype: 1009 atom = tables.FloatAtom() 1010 elif "uint8" in dtype: 1011 atom = tables.UInt8Atom() 1012 else: 1013 atom = tables.IntAtom() 1014 group = self.h5file.root.ModelData 1015 new_array = self.h5file.create_carray( 1016 group, dset_name, atom, shape, filters=self.filters 1017 ) 1018 return new_array 1019 1020 def hdf_node_list(self): 1021 return [x.name for x in self.h5file.list_nodes("ModelData")] 1022 1023 def hdf_remove_node_list(self, dset_name): 1024 group = self.h5file.root.ModelData 1025 try: 1026 self.h5file.remove_node(group, dset_name) 1027 except: 1028 pass 1029 self.hdf_node_list 1030 1031 1032def triangle_distribution_fix(left, mode, right, random_seed=None): 1033 """ 1034 Triangle Distribution Fix 1035 ------------------------- 1036 1037 Draw samples from the triangular distribution over the interval [left, right] with modifications. 1038 1039 Ensure some values are drawn at the left and right values by enlarging the interval to 1040 [left - (mode - left), right + (right - mode)] 1041 1042 Parameters 1043 ---------- 1044 left: `float` 1045 lower limit 1046 mode: `float` 1047 mode 1048 right: `float` 1049 upper limit 1050 random_seed: `int` 1051 seed to set numpy's random seed 1052 1053 Returns 1054 ------- 1055 sn_db: `float` 1056 Drawn samples from parameterised triangular distribution 1057 """ 1058 sn_db = 0 1059 while sn_db < left or sn_db > right: 1060 if random_seed: 1061 np.random.seed(random_seed) 1062 sn_db = np.random.triangular(left - (mode - left), mode, right + (right - mode)) 1063 1064 return sn_db
27class Parameters(_Borg): 28 """ 29 Parameter object storing all model parameters. 30 31 Attributes 32 ---------- 33 model_dir_name : str 34 This is the name that the directory will be given, by default `seismic` 35 parameter_file : str 36 User parameters are read from the 'user_config' json file, and 37 additional model parameters are set. 38 test_mode : int 39 If test_mode is set using an integer, the size of the model will 40 be reduced e.g 100 makes a 100x100.It reduces the ammount of 41 time that the program takes to generate data usueful when testing 42 a model. 43 44 **Warning: If you put too small a number, the model may fail due to 45 not enough space to place faults etc...** 46 47 Value should ideally be >= 50 48 runid : str 49 The string runid will be added to the final model directory. 50 rpm_scaling_factors : dict 51 These are user-defined parameter. You can use the defaults 52 provided, but results might be unrealistic. 53 These might need to be tuned to get reaslistic synthetic 54 data. 55 sqldict : dict 56 This is a dictionary structure that stores all the parameters 57 of the model. This dictionary eventually gets written to a 58 sqlite DB file. 59 60 Methods 61 ------- 62 setup_model(rpm_factors=None) -> None: 63 Method to set up all the necesary parameters to start a new model. 64 make_directories() -> None: 65 Method that generates all necessary directory structures on disk 66 write_key_file(): 67 Method to generate a key file that describes coordinate systems, 68 track, bin, digi (inline, xlines, ) 69 write_to_logfile(msg, mainkey=None, subkey=None, val=""): 70 Method that writes to the logfile 71 """ 72 73 def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None): 74 """ 75 Initialize the Parameters object. 76 77 Parameters 78 ---------- 79 user_config : `str`, optional 80 This is the path on disk that points to a `.json` file 81 that contains the configurations for each run, by default CONFIG_PATH 82 test_mode : `int`, optional 83 The parameter that sets the running mode, by default 0 84 runid : `str`, optional 85 This is the runid of the run, this comes in handy when you have many runs 86 with various permutations of parameters, by default None 87 """ 88 # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty 89 self._shared_state = {} 90 super().__init__() 91 self.model_dir_name: str = "seismic" 92 self.parameter_file = user_config 93 self.test_mode = test_mode 94 self.runid = runid 95 self.rpm_scaling_factors = dict() 96 self.sqldict = defaultdict(dict) 97 98 def __repr__(self): 99 """ 100 Representation method 101 102 Parameters 103 ---------- 104 self : `Parameters` 105 The instance of the Parameters object 106 """ 107 # Make nice repr instead of a print method 108 items = ("\t{} = {}".format(k, v) for k, v in self.__dict__.items()) 109 return "{}:\n{}".format(self.__class__.__name__, "\n".join(sorted(items))) 110 111 def __getitem__(self, key: str): 112 """__getitem__ 113 114 Enable retrieval of values as though the class instance is a dict 115 116 Parameters 117 ---------- 118 key : str 119 The key desired to be accessed 120 121 Returns 122 ------- 123 any 124 Value of the key 125 """ 126 return self._shared_state[key] 127 128 def setup_model(self, rpm_factors=None) -> None: 129 """ 130 Setup Model 131 ----------- 132 Sets up the creation of essential parameters and directories 133 134 Parameters 135 ---------- 136 rpm_factors : `dict`, optional 137 The rock physics model factors for generating the synthetic cube. 138 By default the rpm factors come from a default in the main.py file 139 140 Returns 141 ------- 142 None 143 """ 144 # Set model parameters 145 self._set_model_parameters(self.model_dir_name) 146 self.make_directories() 147 self.write_key_file() 148 self._setup_rpm_scaling_factors(rpm_factors) 149 150 # Write model parameters to logfile 151 self._write_initial_model_parameters_to_logfile() 152 153 def make_directories(self) -> None: 154 """ 155 Make directories. 156 ----------------- 157 158 Creates the necessary directories to run the model. 159 160 This function creates the directories on disk 161 necessary for the model to run. 162 163 Parameters 164 ---------- 165 self : `Parameters` 166 167 Returns 168 ------- 169 None 170 """ 171 print(f"\nModel folder: {self.work_subfolder}") 172 self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name 173 for folder in [self.project_folder, self.work_subfolder, self.temp_folder]: 174 try: 175 os.stat(folder) 176 except OSError: 177 print(f"Creating directory: {folder}") 178 # Try making folders (can fail if multiple models are being built simultaneously in a new dir) 179 try: 180 os.mkdir(folder) 181 except OSError: 182 pass 183 try: 184 os.system(f"chmod -R 777 {self.work_subfolder}") 185 except OSError: 186 print(f"Could not chmod {self.work_subfolder}. Continuing...") 187 pass 188 189 def write_key_file(self) -> None: 190 """ 191 Write key file 192 -------------- 193 194 Writes a file that ocntains important parameters about the cube. 195 196 Method that writes important parameters about the synthetic cube 197 such as coordinate transforms and sizes. 198 199 Parameters 200 ---------- 201 None 202 203 Returns 204 ---------- 205 None 206 """ 207 # Set plausible key file values 208 geom_expand = dict() 209 geom_expand["3D_NAME"] = "synthetic data for training" 210 geom_expand["COORD_METHOD"] = 1 211 geom_expand["DATA_TYPE"] = "3D" 212 geom_expand["DELTA_BIN_NUM"] = 1 213 geom_expand["DELTA_TRACK_NUM"] = 1 214 geom_expand["DIGITIZATION"] = 4 215 geom_expand["EPSG_CRS"] = 32066 216 geom_expand["FIRST_BIN"] = 1000 217 geom_expand["FIRST_TRACK"] = 2000 218 geom_expand["FORMAT"] = 1 219 geom_expand["N_BIN"] = self.cube_shape[1] 220 geom_expand["N_SAMP"] = self.cube_shape[2] 221 geom_expand["N_TRACK"] = self.cube_shape[0] 222 geom_expand["PROJECTION"] = 316 223 geom_expand["REAL_DELTA_X"] = 100.0 224 geom_expand["REAL_DELTA_Y"] = 100.0 225 geom_expand["REAL_GEO_X"] = 1250000.0 226 geom_expand["REAL_GEO_Y"] = 10500000.0 227 geom_expand["SKEW_ANGLE"] = 0.0 228 geom_expand["SUBPOINT_CODE"] = "TTTBBB" 229 geom_expand["TIME_OR_DEPTH"] = "TIME" 230 geom_expand["TRACK_DIR"] = "H" 231 geom_expand["XFORM_TO_WGS84"] = 1241 232 geom_expand["ZERO_TIME"] = 0 233 234 # Write the keyfile 235 outputkey = os.path.join( 236 self.work_subfolder, "seismicCube_" + self.date_stamp + ".key" 237 ) 238 with open(outputkey, "w") as key: 239 key.write( 240 "{}MESSAGE_FILE\n".format(20 * " ") 241 ) # spaces are important here.. Require 20 of them 242 key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"]) 243 key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"])) 244 key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"]) 245 key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"])) 246 key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"])) 247 key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"])) 248 key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"])) 249 key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"])) 250 key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"])) 251 key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"])) 252 key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"])) 253 key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"])) 254 key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"])) 255 key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"])) 256 key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"])) 257 key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"])) 258 key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"])) 259 key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"])) 260 key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"])) 261 key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"]) 262 key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"]) 263 key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"]) 264 key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"])) 265 key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"])) 266 print(f"\nKeyfile created at {outputkey}") 267 268 def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None: 269 """ 270 write_to_logfile 271 272 Method to write msg to model_parameter file 273 (includes newline) 274 275 Parameters 276 ---------- 277 msg : `string` 278 Required string object that will be written tom model parameter file. 279 mainkey : `string` 280 String of the key to be written into de sql dictionary. 281 subkey : `string` 282 String of the subkey to be written into de sql dictionary. 283 val : `string` 284 String of the value that should be written into the sql dictionary. 285 286 Returns 287 ------- 288 None 289 """ 290 if msg is not None: 291 with open(self.logfile, "a") as f: 292 f.write(f"{msg}\n") 293 if mainkey is not None: 294 self.sqldict[mainkey][subkey] = val 295 # for k, v in self.sqldict.items(): 296 # print(f"{k}: {v}") 297 298 def write_sqldict_to_logfile(self, logfile=None) -> None: 299 """ 300 write_sqldict_to_logfile 301 302 Write the sql dictionary to the logfile 303 304 Parameters 305 ---------- 306 logfile : `string` 307 The path to the log file. By default None 308 309 Returns 310 ------- 311 None 312 """ 313 if logfile is None: 314 logfile = self.logfile 315 with open(logfile, "a") as f: 316 for k, nested in self.sqldict.items(): 317 print(k, file=f) 318 if k == "model_id": 319 print(f"\t{nested}", file=f) 320 else: 321 for subkey, value in nested.items(): 322 print(f"\t{subkey}: {value}", file=f) 323 print(file=f) 324 325 def write_sqldict_to_db(self) -> None: 326 """ 327 write_sqldict_to_db 328 329 Method to write the sqldict to database sqlite file 330 331 Parameters 332 ---------- 333 None 334 335 Returns 336 ------- 337 None 338 """ 339 model_id = pathlib.Path(self.work_subfolder).name 340 model_parameters = self.sqldict["model_parameters"] 341 fault_keys = [k for k in self.sqldict.keys() if "fault" in k] 342 closure_keys = [k for k in self.sqldict.keys() if "closure" in k] 343 344 conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db")) 345 # tables = ["model_parameters", "fault_parameters", "closure_parameters"] 346 # create tables 347 sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})" 348 conn.execute(sql) 349 # insert model_parameters 350 columns = "model_id, " + ", ".join(model_parameters.keys()) 351 placeholders = ", ".join("?" * (len(model_parameters) + 1)) 352 sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})" 353 values = tuple([model_id] + [str(x) for x in model_parameters.values()]) 354 conn.execute(sql, values) 355 conn.commit() 356 357 # fault parameters 358 if len(fault_keys) > 0: 359 f = tuple(self.sqldict[fault_keys[0]].keys()) 360 sql = f"CREATE TABLE fault_parameters ({','.join(f)})" 361 conn.execute(sql) 362 columns = ", ".join(self.sqldict[fault_keys[0]].keys()) 363 placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys())) 364 for f in fault_keys: 365 sql = ( 366 f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})" 367 ) 368 conn.execute(sql, tuple(self.sqldict[f].values())) 369 conn.commit() 370 371 if len(closure_keys) > 0: 372 c = tuple(self.sqldict[closure_keys[0]].keys()) 373 sql = f"CREATE TABLE closure_parameters ({','.join(c)})" 374 conn.execute(sql) 375 columns = ", ".join(self.sqldict[closure_keys[0]].keys()) 376 placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys())) 377 for c in closure_keys: 378 sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})" 379 conn.execute(sql, tuple(self.sqldict[c].values())) 380 conn.commit() 381 382 def _setup_rpm_scaling_factors(self, rpm_factors: dict) -> None: 383 """ 384 Setup Rock Physics Model scaling factors 385 ---------------------------------------- 386 387 Method to initialize all the rock physics model 388 scaling factors. Method also writes the values to 389 the model_parameters log file. 390 391 Parameters 392 ---------- 393 TODO remove the default in the main.py or have a single source of truth 394 rpm_factors : `dict` 395 Dictionary containing the scaling factors for the RPM. 396 If no RPM factors are provided, the default values are used. 397 398 Returns 399 ------- 400 None 401 """ 402 if rpm_factors and not self.test_mode: 403 self.rpm_scaling_factors = rpm_factors 404 else: 405 # Use defaults for RPM Z-shifts and scaling factors 406 self.rpm_scaling_factors = dict() 407 self.rpm_scaling_factors["layershiftsamples"] = int( 408 np.random.triangular(35, 75, 125) 409 ) 410 self.rpm_scaling_factors["RPshiftsamples"] = int( 411 np.random.triangular(5, 11, 20) 412 ) 413 self.rpm_scaling_factors["shalerho_factor"] = 1.0 414 self.rpm_scaling_factors["shalevp_factor"] = 1.0 415 self.rpm_scaling_factors["shalevs_factor"] = 1.0 416 self.rpm_scaling_factors["sandrho_factor"] = 1.0 417 self.rpm_scaling_factors["sandvp_factor"] = 1.0 418 self.rpm_scaling_factors["sandvs_factor"] = 1.0 419 self.rpm_scaling_factors["nearfactor"] = 1.0 420 self.rpm_scaling_factors["midfactor"] = 1.0 421 self.rpm_scaling_factors["farfactor"] = 1.0 422 # Write factors to logfile 423 for k, v in self.rpm_scaling_factors.items(): 424 self.write_to_logfile( 425 msg=f"{k}: {v}", mainkey="model_parameters", subkey=k, val=v 426 ) 427 428 def _set_model_parameters(self, dname: str) -> None: 429 """ 430 Set Model Parameters 431 ---------------------------------------- 432 433 Method that sets model parameters from user-provided 434 config.json file 435 436 Parameters 437 ---------- 438 dname : `str` 439 Directory name specified in the configuration file, 440 or the default is used 441 442 Returns 443 ------- 444 None 445 """ 446 self.current_dir = os.getcwd() 447 self.start_time = datetime.datetime.now() 448 self.date_stamp = self.year_plus_fraction() 449 450 # Read from input json 451 self.parameters_json = self._read_json() 452 self._read_user_params() 453 454 # Directories 455 model_dir = f"{dname}__{self.date_stamp}" 456 temp_dir = f"temp_folder__{self.date_stamp}" 457 self.work_subfolder = os.path.abspath( 458 os.path.join(self.project_folder, model_dir) 459 ) 460 self.temp_folder = os.path.abspath( 461 os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}") 462 ) 463 if self.runid: 464 self.work_subfolder = f"{self.work_subfolder}_{self.runid}" 465 self.temp_folder = f"{self.temp_folder}_{self.runid}" 466 467 # Various model parameters, not in config 468 self.num_lyr_lut = self.cube_shape[2] * 2 * self.infill_factor 469 # 2500 voxels = 25x25x4m voxels size, 25% porosity and closures > ~40,000 bbl 470 # Use the minimum voxel count as initial closure size filter 471 self.closure_min_voxels = min( 472 self.closure_min_voxels_simple, 473 self.closure_min_voxels_faulted, 474 self.closure_min_voxels_onlap, 475 ) 476 self.order = self.bandwidth_ord 477 478 if self.test_mode: 479 self._set_test_mode(self.test_mode, self.test_mode) 480 481 # Random choices are separated into this method 482 self._randomly_chosen_model_parameters() 483 # Fault choices 484 self._fault_settings() 485 486 # Logfile 487 self.logfile = os.path.join( 488 self.work_subfolder, f"model_parameters_{self.date_stamp}.txt" 489 ) 490 491 # HDF file to store various model data 492 self.hdf_master = os.path.join( 493 self.work_subfolder, f"seismicCube__{self.date_stamp}.hdf" 494 ) 495 496 def _calculate_snr_after_lateral_filter(self, sn_db: float) -> float: 497 """ 498 Calculate Signal:Noise Ratio after lateral filter 499 ---------------------------------------- 500 501 Method that computes the signal to noise ratio after 502 the lateral filter is applied. 503 504 Parameters 505 ---------- 506 sn_db : `float` 507 Value of the signal to noise value from the database 508 509 Returns 510 ------- 511 pre_smear_snr : `float` 512 Signal to noise ratio after the lateral filter is applied 513 """ 514 snr_of_lateral_filter = 10 * np.log10(self.lateral_filter_size ** 2) 515 pre_smear_snr = sn_db - snr_of_lateral_filter 516 return pre_smear_snr 517 518 def _randomly_chosen_model_parameters(self) -> None: 519 """ 520 Randomly Chosen Model Parameters 521 ---------------------------------------- 522 523 Method that sets all randomly chosen model parameters 524 525 Parameters 526 ---------- 527 None 528 529 Returns 530 ------- 531 None 532 """ 533 # Initial layer standard deviation 534 self.initial_layer_stdev = ( 535 np.random.uniform(self.lyr_stdev[0], high=self.lyr_stdev[1]) 536 * self.infill_factor 537 ) 538 539 # lateral filter size, either 1x1, 3x3 or 5x5 540 self.lateral_filter_size = int(np.random.uniform(0, 2) + 0.5) * 2 + 1 541 542 # Signal to noise in decibels 543 sn_db = triangle_distribution_fix( 544 left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2] 545 ) 546 # sn_db = np.random.triangular(left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2]) 547 # self.sn_db = self._calculate_snr_after_lateral_filter(sn_db) 548 self.sn_db = sn_db 549 550 # Percentage of layers that are sand 551 self.sand_layer_pct = np.random.uniform( 552 low=self.sand_layer_pct_min, high=self.sand_layer_pct_max 553 ) 554 555 # Minimum shallowest depth of seabed 556 if ( 557 len(self.seabed_min_depth) > 1 558 ): # if low/high value provided, select a value between these 559 self.seabed_min_depth = np.random.randint( 560 low=self.seabed_min_depth[0], high=self.seabed_min_depth[1] 561 ) 562 563 # Low/High bandwidth to be used 564 self.lowfreq = np.random.uniform(self.bandwidth_low[0], self.bandwidth_low[1]) 565 self.highfreq = np.random.uniform( 566 self.bandwidth_high[0], self.bandwidth_high[1] 567 ) 568 569 # Choose whether to add coherent noise 570 self.add_noise = np.random.choice((0, 1)) 571 if self.add_noise == 1: 572 self.smiley_or_frowny = np.random.choice((0, 1)) 573 if self.smiley_or_frowny == 1: 574 self.fnoise = "random_coherent_frowns" 575 print("Coherent frowns will be inserted") 576 else: 577 self.fnoise = "random_coherent_smiles" 578 print("Coherent smiles will be inserted") 579 else: 580 self.fnoise = "random" 581 print("No coherent noise will be inserted") 582 583 # Salt inclusion 584 # self.include_salt = np.random.choice([True, False], 1, p=[0.5, 0.5])[0] 585 self.noise_stretch_factor = np.random.uniform(1.15, 1.35) 586 if self.include_salt: 587 print( 588 "Salt will be inserted. noise_stretch_factor = {}".format( 589 np.around(self.noise_stretch_factor, 2) 590 ) 591 ) 592 else: 593 print("Salt will be NOT be inserted.") 594 595 def _read_json(self) -> dict: 596 # TODO Move this to a separate function in utlis? 597 """ 598 Read JSON file 599 ---------------------------------------- 600 601 Reads a json file on disk and loads it as 602 dictionary 603 604 Parameters 605 ---------- 606 None 607 608 Returns 609 ------- 610 config : `dict` 611 Dictionary with the configuration options 612 """ 613 with open(self.parameter_file) as f: 614 config: dict = json.load(f) 615 return config 616 617 def _read_user_params(self) -> None: 618 """ 619 Read User Params 620 ---------------------------------------- 621 622 Takes the read in dictionary of JSON configuration 623 and reads each parameter and inserts it into the 624 attributes. 625 626 In the end it prints a summary of the parameters 627 to the console. 628 629 Parameters 630 ---------- 631 None 632 633 Returns 634 ------- 635 None 636 """ 637 d = self._read_json() 638 self.project = d["project"] 639 self.project_folder = d["project_folder"] 640 wfolder = d["work_folder"] 641 if not os.path.exists(wfolder): 642 wfolder = "/tmp" # In case work_folder does not exist, use /tmp 643 self.work_folder = wfolder 644 # read parameters into Parameter class attributes 645 self.cube_shape = tuple(d["cube_shape"]) 646 self.incident_angles = tuple(d["incident_angles"]) 647 self.digi = d["digi"] 648 self.infill_factor = d["infill_factor"] 649 self.lyr_stdev = d["initial_layer_stdev"] 650 self.thickness_min = d["thickness_min"] 651 self.thickness_max = d["thickness_max"] 652 self.seabed_min_depth = d["seabed_min_depth"] 653 self.snr_db = d["signal_to_noise_ratio_db"] 654 # self.random_depth_perturb = d['random_depth_perturb_range'] 655 self.bandwidth_low = d["bandwidth_low"] 656 self.bandwidth_high = d["bandwidth_high"] 657 self.bandwidth_ord = d["bandwidth_ord"] 658 self.dip_factor_max = d["dip_factor_max"] 659 self.min_number_faults = d["min_number_faults"] 660 self.max_number_faults = d["max_number_faults"] 661 self.basin_floor_fans = d["basin_floor_fans"] 662 self.pad_samples = d["pad_samples"] 663 self.qc_plots = d["extra_qc_plots"] 664 self.verbose = d["verbose"] 665 self.include_channels = d["include_channels"] 666 self.include_salt = d["include_salt"] 667 self.max_column_height = d["max_column_height"] 668 self.closure_types = d["closure_types"] 669 self.closure_min_voxels_simple = d["min_closure_voxels_simple"] 670 self.closure_min_voxels_faulted = d["min_closure_voxels_faulted"] 671 self.closure_min_voxels_onlap = d["min_closure_voxels_onlap"] 672 self.partial_voxels = d["partial_voxels"] 673 self.variable_shale_ng = d["variable_shale_ng"] 674 self.sand_layer_thickness = d["sand_layer_thickness"] 675 self.sand_layer_pct_min = d["sand_layer_fraction"]["min"] 676 self.sand_layer_pct_max = d["sand_layer_fraction"]["max"] 677 self.hdf_store = d["write_to_hdf"] 678 self.broadband_qc_volume = d["broadband_qc_volume"] 679 self.model_qc_volumes = d["model_qc_volumes"] 680 self.multiprocess_bp = d["multiprocess_bp"] 681 682 # print em 683 self.__repr__() 684 685 def _set_test_mode(self, size_x: int = 50, size_y: int = 50) -> None: 686 """ 687 Set test mode 688 ------------- 689 690 Sets whether the parameters for testinf mode. If no size integer 691 is provided is defaults to 50. 692 693 This value is a good minimum because it allows for the 3D model 694 to be able to contain faults and other objects inside. 695 696 Parameters 697 ---------- 698 size_x : `int` 699 The parameter that sets the size of the model in the x direction 700 size_y : `int` 701 The parameter that sets the size of the model in the y direction 702 703 Returns 704 ------- 705 None 706 """ 707 708 # Set output model folder in work_folder location but with same directory name as project_folder 709 normpath = ( 710 os.path.normpath(self.project_folder) + "_test_mode_" 711 ) # strip trailing / if added 712 new_project_folder = os.path.join(self.work_folder, os.path.basename(normpath)) 713 # Put all folders inside project folder for easy deleting 714 self.work_folder = new_project_folder 715 self.project_folder = new_project_folder 716 self.work_subfolder = os.path.join( 717 new_project_folder, os.path.basename(self.work_subfolder) 718 ) 719 if self.runid: 720 # Append runid if provided 721 self.temp_folder = f"{self.temp_folder}_{self.runid}__{self.date_stamp}" 722 else: 723 self.temp_folder = os.path.abspath( 724 os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}") 725 ) 726 # Set smaller sized model 727 self.cube_shape = tuple([size_x, size_y, self.cube_shape[-1]]) 728 # Print message to user 729 print( 730 "{0}\nTesting Mode\nOutput Folder: {1}\nCube_Shape: {2}\n{0}".format( 731 36 * "-", self.project_folder, self.cube_shape 732 ) 733 ) 734 735 def _fault_settings(self) -> None: 736 """ 737 Set Fault Settings 738 ------------- 739 740 Sets the parameters that will be used to generate faults throughout 741 the synthetic model. 742 743 Parameters 744 ---------- 745 None 746 747 Returns 748 ------- 749 None 750 """ 751 # Fault parameters 752 self.low_fault_throw = 5.0 * self.infill_factor 753 self.high_fault_throw = 35.0 * self.infill_factor 754 755 # mode & clustering are randomly chosen 756 self.mode = np.random.choice([0, 1, 2], 1)[ 757 0 758 ] 759 self.clustering = np.random.choice([0, 1, 2], 1)[0] 760 761 if self.mode == 0: 762 # As random as it can be 763 self.number_faults = np.random.randint( 764 self.min_number_faults, self.max_number_faults 765 ) 766 self.fmode = "random" 767 768 elif self.mode == 1: 769 if self.clustering == 0: 770 self.fmode = "self_branching" 771 # Self Branching. avoid large fault 772 self.number_faults = np.random.randint( 773 3, 9 774 ) 775 self.low_fault_throw = 5.0 * self.infill_factor 776 self.high_fault_throw = 15.0 * self.infill_factor 777 if self.clustering == 1: 778 # Stair case 779 self.fmode = "stair_case" 780 self.number_faults = np.random.randint( 781 5, self.max_number_faults 782 ) 783 if self.clustering == 2: 784 # Relay ramps 785 self.fmode = "relay_ramp" 786 self.number_faults = np.random.randint( 787 3, 9 788 ) 789 self.low_fault_throw = 5.0 * self.infill_factor 790 self.high_fault_throw = 15.0 * self.infill_factor 791 elif self.mode == 2: 792 # Horst and graben 793 self.fmode = "horst_and_graben" 794 self.number_faults = np.random.randint( 795 3, 7 796 ) 797 798 self.fault_param = [ 799 str(self.mode) + str(self.clustering), 800 self.number_faults, 801 self.low_fault_throw, 802 self.high_fault_throw, 803 ] 804 805 def _get_commit_hash(self) -> str: 806 """ 807 Get Commit Hash 808 ------------- 809 810 Gets the commit hash of the current git repository. 811 812 #TODO Explain what this is for exactly 813 814 Parameters 815 ---------- 816 None 817 818 Returns 819 ------- 820 sha : `str` 821 The commit hash of the current git repository 822 """ 823 824 try: 825 sha = ( 826 subprocess.check_output(["git", "rev-parse", "HEAD"]) 827 .decode("utf-8") 828 .strip() 829 ) 830 except CalledProcessError: 831 sha = "cwd not a git repository" 832 return sha 833 834 def _write_initial_model_parameters_to_logfile(self) -> None: 835 """ 836 Write Initial Model Parameters to Logfile 837 ---------------------------------------- 838 839 Method that writes the initial parameters set for the model 840 to the logfile. 841 842 Parameters 843 ---------- 844 None 845 846 Returns 847 ------- 848 None 849 """ 850 _sha = self._get_commit_hash() 851 self.write_to_logfile( 852 f"SHA: {_sha}", mainkey="model_parameters", subkey="sha", val=_sha 853 ) 854 self.write_to_logfile( 855 f"modeling start time: {self.start_time}", 856 mainkey="model_parameters", 857 subkey="start_time", 858 val=self.start_time, 859 ) 860 self.write_to_logfile( 861 f"project_folder: {self.project_folder}", 862 mainkey="model_parameters", 863 subkey="project_folder", 864 val=self.project_folder, 865 ) 866 self.write_to_logfile( 867 f"work_subfolder: {self.work_subfolder}", 868 mainkey="model_parameters", 869 subkey="work_subfolder", 870 val=self.work_subfolder, 871 ) 872 self.write_to_logfile( 873 f"cube_shape: {self.cube_shape}", 874 mainkey="model_parameters", 875 subkey="cube_shape", 876 val=self.cube_shape, 877 ) 878 self.write_to_logfile( 879 f"incident_angles: {self.incident_angles}", 880 mainkey="model_parameters", 881 subkey="incident_angles", 882 val=self.incident_angles, 883 ) 884 self.write_to_logfile( 885 f"number_faults: {self.number_faults}", 886 mainkey="model_parameters", 887 subkey="number_faults", 888 val=self.number_faults, 889 ) 890 self.write_to_logfile( 891 f"lateral_filter_size: {self.lateral_filter_size}", 892 mainkey="model_parameters", 893 subkey="lateral_filter_size", 894 val=self.lateral_filter_size, 895 ) 896 self.write_to_logfile( 897 f"salt_inserted: {self.include_salt}", 898 mainkey="model_parameters", 899 subkey="salt_inserted", 900 val=self.include_salt, 901 ) 902 self.write_to_logfile( 903 f"salt noise_stretch_factor: {self.noise_stretch_factor:.2f}", 904 mainkey="model_parameters", 905 subkey="salt_noise_stretch_factor", 906 val=self.noise_stretch_factor, 907 ) 908 self.write_to_logfile( 909 f"bandpass_bandlimits: {self.lowfreq:.2f}, {self.highfreq:.2f}" 910 ) 911 self.write_to_logfile( 912 msg=None, 913 mainkey="model_parameters", 914 subkey="bandpass_bandlimit_low", 915 val=self.lowfreq, 916 ) 917 self.write_to_logfile( 918 msg=None, 919 mainkey="model_parameters", 920 subkey="bandpass_bandlimit_high", 921 val=self.highfreq, 922 ) 923 self.write_to_logfile( 924 f"sn_db: {self.sn_db:.2f}", 925 mainkey="model_parameters", 926 subkey="sn_db", 927 val=self.sn_db, 928 ) 929 self.write_to_logfile( 930 f"initial layer depth stdev (flatness of layer): {self.initial_layer_stdev:.2f}", 931 mainkey="model_parameters", 932 subkey="initial_layer_stdev", 933 val=self.initial_layer_stdev, 934 ) 935 936 @staticmethod 937 def year_plus_fraction() -> str: 938 # TODO Move this to utils separate module 939 """ 940 Year Plus Fraction 941 ---------------------------------------- 942 943 Method generates a time stamp in the format of 944 year + fraction of year. 945 946 Parameters 947 ---------- 948 None 949 950 Returns 951 ------- 952 fraction of the year : str 953 The time stamp in the format of year + fraction of year 954 955 """ 956 now = datetime.datetime.now() 957 year = now.year 958 secs_in_year = datetime.timedelta(days=365).total_seconds() 959 fraction_of_year = ( 960 now - datetime.datetime(year, 1, 1, 0, 0) 961 ).total_seconds() / secs_in_year 962 return format(year + fraction_of_year, "14.8f").replace(" ", "") 963 964 def hdf_setup(self, hdf_name: str) -> None: 965 """ 966 Setup HDF files 967 --------------- 968 969 This method sets up the HDF structures 970 971 Parameters 972 ---------- 973 hdf_name : str 974 The name of the HDF file to be created 975 976 Returns 977 ------- 978 None 979 """ 980 num_threads = min(8, mp.cpu_count() - 1) 981 tables.set_blosc_max_threads(num_threads) 982 self.hdf_filename = os.path.join(self.temp_folder, hdf_name) 983 self.filters = tables.Filters( 984 complevel=5, complib="blosc" 985 ) # compression with fast write speed 986 self.h5file = tables.open_file(self.hdf_filename, "w") 987 self.h5file.create_group("/", "ModelData") 988 989 def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray: 990 991 """ 992 HDF Initialize 993 ---------------------------------------- 994 995 Method that initializes the HDF chunked 996 array 997 998 Parameters 999 ---------- 1000 dset_name : str 1001 The name of the dataset to be created 1002 shape : tuple 1003 1004 1005 Returns 1006 ------- 1007 new_array: tables.CArray 1008 """ 1009 if "float" in dtype: 1010 atom = tables.FloatAtom() 1011 elif "uint8" in dtype: 1012 atom = tables.UInt8Atom() 1013 else: 1014 atom = tables.IntAtom() 1015 group = self.h5file.root.ModelData 1016 new_array = self.h5file.create_carray( 1017 group, dset_name, atom, shape, filters=self.filters 1018 ) 1019 return new_array 1020 1021 def hdf_node_list(self): 1022 return [x.name for x in self.h5file.list_nodes("ModelData")] 1023 1024 def hdf_remove_node_list(self, dset_name): 1025 group = self.h5file.root.ModelData 1026 try: 1027 self.h5file.remove_node(group, dset_name) 1028 except: 1029 pass 1030 self.hdf_node_list
Parameter object storing all model parameters.
Attributes
- model_dir_name (str):
This is the name that the directory will be given, by default
seismic
- parameter_file (str): User parameters are read from the 'user_config' json file, and additional model parameters are set.
test_mode (int): If test_mode is set using an integer, the size of the model will be reduced e.g 100 makes a 100x100.It reduces the ammount of time that the program takes to generate data usueful when testing a model.
Warning: If you put too small a number, the model may fail due to not enough space to place faults etc...
Value should ideally be >= 50
- runid (str): The string runid will be added to the final model directory.
- rpm_scaling_factors (dict): These are user-defined parameter. You can use the defaults provided, but results might be unrealistic. These might need to be tuned to get reaslistic synthetic data.
- sqldict (dict): This is a dictionary structure that stores all the parameters of the model. This dictionary eventually gets written to a sqlite DB file.
Methods
setup_model(rpm_factors=None) -> None: Method to set up all the necesary parameters to start a new model. make_directories() -> None: Method that generates all necessary directory structures on disk write_key_file(): Method to generate a key file that describes coordinate systems, track, bin, digi (inline, xlines, ) write_to_logfile(msg, mainkey=None, subkey=None, val=""): Method that writes to the logfile
73 def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None): 74 """ 75 Initialize the Parameters object. 76 77 Parameters 78 ---------- 79 user_config : `str`, optional 80 This is the path on disk that points to a `.json` file 81 that contains the configurations for each run, by default CONFIG_PATH 82 test_mode : `int`, optional 83 The parameter that sets the running mode, by default 0 84 runid : `str`, optional 85 This is the runid of the run, this comes in handy when you have many runs 86 with various permutations of parameters, by default None 87 """ 88 # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty 89 self._shared_state = {} 90 super().__init__() 91 self.model_dir_name: str = "seismic" 92 self.parameter_file = user_config 93 self.test_mode = test_mode 94 self.runid = runid 95 self.rpm_scaling_factors = dict() 96 self.sqldict = defaultdict(dict)
Initialize the Parameters object.
Parameters
- user_config (
str
, optional): This is the path on disk that points to a.json
file that contains the configurations for each run, by default CONFIG_PATH - test_mode (
int
, optional): The parameter that sets the running mode, by default 0 - runid (
str
, optional): This is the runid of the run, this comes in handy when you have many runs with various permutations of parameters, by default None
128 def setup_model(self, rpm_factors=None) -> None: 129 """ 130 Setup Model 131 ----------- 132 Sets up the creation of essential parameters and directories 133 134 Parameters 135 ---------- 136 rpm_factors : `dict`, optional 137 The rock physics model factors for generating the synthetic cube. 138 By default the rpm factors come from a default in the main.py file 139 140 Returns 141 ------- 142 None 143 """ 144 # Set model parameters 145 self._set_model_parameters(self.model_dir_name) 146 self.make_directories() 147 self.write_key_file() 148 self._setup_rpm_scaling_factors(rpm_factors) 149 150 # Write model parameters to logfile 151 self._write_initial_model_parameters_to_logfile()
Setup Model
Sets up the creation of essential parameters and directories
Parameters
- rpm_factors (
dict
, optional): The rock physics model factors for generating the synthetic cube. By default the rpm factors come from a default in the main.py file
Returns
- None
153 def make_directories(self) -> None: 154 """ 155 Make directories. 156 ----------------- 157 158 Creates the necessary directories to run the model. 159 160 This function creates the directories on disk 161 necessary for the model to run. 162 163 Parameters 164 ---------- 165 self : `Parameters` 166 167 Returns 168 ------- 169 None 170 """ 171 print(f"\nModel folder: {self.work_subfolder}") 172 self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name 173 for folder in [self.project_folder, self.work_subfolder, self.temp_folder]: 174 try: 175 os.stat(folder) 176 except OSError: 177 print(f"Creating directory: {folder}") 178 # Try making folders (can fail if multiple models are being built simultaneously in a new dir) 179 try: 180 os.mkdir(folder) 181 except OSError: 182 pass 183 try: 184 os.system(f"chmod -R 777 {self.work_subfolder}") 185 except OSError: 186 print(f"Could not chmod {self.work_subfolder}. Continuing...") 187 pass
Make directories.
Creates the necessary directories to run the model.
This function creates the directories on disk necessary for the model to run.
Parameters
- self (
Parameters
):
Returns
- None
189 def write_key_file(self) -> None: 190 """ 191 Write key file 192 -------------- 193 194 Writes a file that ocntains important parameters about the cube. 195 196 Method that writes important parameters about the synthetic cube 197 such as coordinate transforms and sizes. 198 199 Parameters 200 ---------- 201 None 202 203 Returns 204 ---------- 205 None 206 """ 207 # Set plausible key file values 208 geom_expand = dict() 209 geom_expand["3D_NAME"] = "synthetic data for training" 210 geom_expand["COORD_METHOD"] = 1 211 geom_expand["DATA_TYPE"] = "3D" 212 geom_expand["DELTA_BIN_NUM"] = 1 213 geom_expand["DELTA_TRACK_NUM"] = 1 214 geom_expand["DIGITIZATION"] = 4 215 geom_expand["EPSG_CRS"] = 32066 216 geom_expand["FIRST_BIN"] = 1000 217 geom_expand["FIRST_TRACK"] = 2000 218 geom_expand["FORMAT"] = 1 219 geom_expand["N_BIN"] = self.cube_shape[1] 220 geom_expand["N_SAMP"] = self.cube_shape[2] 221 geom_expand["N_TRACK"] = self.cube_shape[0] 222 geom_expand["PROJECTION"] = 316 223 geom_expand["REAL_DELTA_X"] = 100.0 224 geom_expand["REAL_DELTA_Y"] = 100.0 225 geom_expand["REAL_GEO_X"] = 1250000.0 226 geom_expand["REAL_GEO_Y"] = 10500000.0 227 geom_expand["SKEW_ANGLE"] = 0.0 228 geom_expand["SUBPOINT_CODE"] = "TTTBBB" 229 geom_expand["TIME_OR_DEPTH"] = "TIME" 230 geom_expand["TRACK_DIR"] = "H" 231 geom_expand["XFORM_TO_WGS84"] = 1241 232 geom_expand["ZERO_TIME"] = 0 233 234 # Write the keyfile 235 outputkey = os.path.join( 236 self.work_subfolder, "seismicCube_" + self.date_stamp + ".key" 237 ) 238 with open(outputkey, "w") as key: 239 key.write( 240 "{}MESSAGE_FILE\n".format(20 * " ") 241 ) # spaces are important here.. Require 20 of them 242 key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"]) 243 key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"])) 244 key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"]) 245 key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"])) 246 key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"])) 247 key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"])) 248 key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"])) 249 key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"])) 250 key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"])) 251 key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"])) 252 key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"])) 253 key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"])) 254 key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"])) 255 key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"])) 256 key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"])) 257 key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"])) 258 key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"])) 259 key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"])) 260 key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"])) 261 key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"]) 262 key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"]) 263 key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"]) 264 key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"])) 265 key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"])) 266 print(f"\nKeyfile created at {outputkey}")
Write key file
Writes a file that ocntains important parameters about the cube.
Method that writes important parameters about the synthetic cube such as coordinate transforms and sizes.
Parameters
- None
Returns
- None
268 def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None: 269 """ 270 write_to_logfile 271 272 Method to write msg to model_parameter file 273 (includes newline) 274 275 Parameters 276 ---------- 277 msg : `string` 278 Required string object that will be written tom model parameter file. 279 mainkey : `string` 280 String of the key to be written into de sql dictionary. 281 subkey : `string` 282 String of the subkey to be written into de sql dictionary. 283 val : `string` 284 String of the value that should be written into the sql dictionary. 285 286 Returns 287 ------- 288 None 289 """ 290 if msg is not None: 291 with open(self.logfile, "a") as f: 292 f.write(f"{msg}\n") 293 if mainkey is not None: 294 self.sqldict[mainkey][subkey] = val 295 # for k, v in self.sqldict.items(): 296 # print(f"{k}: {v}")
write_to_logfile
Method to write msg to model_parameter file (includes newline)
Parameters
msg (
string
):Required string object that will be written tom model parameter file.
mainkey (
string
):String of the key to be written into de sql dictionary.
subkey (
string
):String of the subkey to be written into de sql dictionary.
val (
string
):String of the value that should be written into the sql dictionary.
Returns
- None
298 def write_sqldict_to_logfile(self, logfile=None) -> None: 299 """ 300 write_sqldict_to_logfile 301 302 Write the sql dictionary to the logfile 303 304 Parameters 305 ---------- 306 logfile : `string` 307 The path to the log file. By default None 308 309 Returns 310 ------- 311 None 312 """ 313 if logfile is None: 314 logfile = self.logfile 315 with open(logfile, "a") as f: 316 for k, nested in self.sqldict.items(): 317 print(k, file=f) 318 if k == "model_id": 319 print(f"\t{nested}", file=f) 320 else: 321 for subkey, value in nested.items(): 322 print(f"\t{subkey}: {value}", file=f) 323 print(file=f)
write_sqldict_to_logfile
Write the sql dictionary to the logfile
Parameters
logfile (
string
):The path to the log file. By default None
Returns
- None
325 def write_sqldict_to_db(self) -> None: 326 """ 327 write_sqldict_to_db 328 329 Method to write the sqldict to database sqlite file 330 331 Parameters 332 ---------- 333 None 334 335 Returns 336 ------- 337 None 338 """ 339 model_id = pathlib.Path(self.work_subfolder).name 340 model_parameters = self.sqldict["model_parameters"] 341 fault_keys = [k for k in self.sqldict.keys() if "fault" in k] 342 closure_keys = [k for k in self.sqldict.keys() if "closure" in k] 343 344 conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db")) 345 # tables = ["model_parameters", "fault_parameters", "closure_parameters"] 346 # create tables 347 sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})" 348 conn.execute(sql) 349 # insert model_parameters 350 columns = "model_id, " + ", ".join(model_parameters.keys()) 351 placeholders = ", ".join("?" * (len(model_parameters) + 1)) 352 sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})" 353 values = tuple([model_id] + [str(x) for x in model_parameters.values()]) 354 conn.execute(sql, values) 355 conn.commit() 356 357 # fault parameters 358 if len(fault_keys) > 0: 359 f = tuple(self.sqldict[fault_keys[0]].keys()) 360 sql = f"CREATE TABLE fault_parameters ({','.join(f)})" 361 conn.execute(sql) 362 columns = ", ".join(self.sqldict[fault_keys[0]].keys()) 363 placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys())) 364 for f in fault_keys: 365 sql = ( 366 f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})" 367 ) 368 conn.execute(sql, tuple(self.sqldict[f].values())) 369 conn.commit() 370 371 if len(closure_keys) > 0: 372 c = tuple(self.sqldict[closure_keys[0]].keys()) 373 sql = f"CREATE TABLE closure_parameters ({','.join(c)})" 374 conn.execute(sql) 375 columns = ", ".join(self.sqldict[closure_keys[0]].keys()) 376 placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys())) 377 for c in closure_keys: 378 sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})" 379 conn.execute(sql, tuple(self.sqldict[c].values())) 380 conn.commit()
write_sqldict_to_db
Method to write the sqldict to database sqlite file
Parameters
- None
Returns
- None
936 @staticmethod 937 def year_plus_fraction() -> str: 938 # TODO Move this to utils separate module 939 """ 940 Year Plus Fraction 941 ---------------------------------------- 942 943 Method generates a time stamp in the format of 944 year + fraction of year. 945 946 Parameters 947 ---------- 948 None 949 950 Returns 951 ------- 952 fraction of the year : str 953 The time stamp in the format of year + fraction of year 954 955 """ 956 now = datetime.datetime.now() 957 year = now.year 958 secs_in_year = datetime.timedelta(days=365).total_seconds() 959 fraction_of_year = ( 960 now - datetime.datetime(year, 1, 1, 0, 0) 961 ).total_seconds() / secs_in_year 962 return format(year + fraction_of_year, "14.8f").replace(" ", "")
Year Plus Fraction
Method generates a time stamp in the format of year + fraction of year.
Parameters
- None
Returns
- fraction of the year (str): The time stamp in the format of year + fraction of year
964 def hdf_setup(self, hdf_name: str) -> None: 965 """ 966 Setup HDF files 967 --------------- 968 969 This method sets up the HDF structures 970 971 Parameters 972 ---------- 973 hdf_name : str 974 The name of the HDF file to be created 975 976 Returns 977 ------- 978 None 979 """ 980 num_threads = min(8, mp.cpu_count() - 1) 981 tables.set_blosc_max_threads(num_threads) 982 self.hdf_filename = os.path.join(self.temp_folder, hdf_name) 983 self.filters = tables.Filters( 984 complevel=5, complib="blosc" 985 ) # compression with fast write speed 986 self.h5file = tables.open_file(self.hdf_filename, "w") 987 self.h5file.create_group("/", "ModelData")
Setup HDF files
This method sets up the HDF structures
Parameters
- hdf_name (str): The name of the HDF file to be created
Returns
- None
989 def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray: 990 991 """ 992 HDF Initialize 993 ---------------------------------------- 994 995 Method that initializes the HDF chunked 996 array 997 998 Parameters 999 ---------- 1000 dset_name : str 1001 The name of the dataset to be created 1002 shape : tuple 1003 1004 1005 Returns 1006 ------- 1007 new_array: tables.CArray 1008 """ 1009 if "float" in dtype: 1010 atom = tables.FloatAtom() 1011 elif "uint8" in dtype: 1012 atom = tables.UInt8Atom() 1013 else: 1014 atom = tables.IntAtom() 1015 group = self.h5file.root.ModelData 1016 new_array = self.h5file.create_carray( 1017 group, dset_name, atom, shape, filters=self.filters 1018 ) 1019 return new_array
HDF Initialize
Method that initializes the HDF chunked array
Parameters
- dset_name (str): The name of the dataset to be created
- shape (tuple):
Returns
- new_array (tables.CArray):
1033def triangle_distribution_fix(left, mode, right, random_seed=None): 1034 """ 1035 Triangle Distribution Fix 1036 ------------------------- 1037 1038 Draw samples from the triangular distribution over the interval [left, right] with modifications. 1039 1040 Ensure some values are drawn at the left and right values by enlarging the interval to 1041 [left - (mode - left), right + (right - mode)] 1042 1043 Parameters 1044 ---------- 1045 left: `float` 1046 lower limit 1047 mode: `float` 1048 mode 1049 right: `float` 1050 upper limit 1051 random_seed: `int` 1052 seed to set numpy's random seed 1053 1054 Returns 1055 ------- 1056 sn_db: `float` 1057 Drawn samples from parameterised triangular distribution 1058 """ 1059 sn_db = 0 1060 while sn_db < left or sn_db > right: 1061 if random_seed: 1062 np.random.seed(random_seed) 1063 sn_db = np.random.triangular(left - (mode - left), mode, right + (right - mode)) 1064 1065 return sn_db
Triangle Distribution Fix
Draw samples from the triangular distribution over the interval [left, right] with modifications.
Ensure some values are drawn at the left and right values by enlarging the interval to [left - (mode - left), right + (right - mode)]
Parameters
- left (
float
): lower limit - mode (
float
): mode - right (
float
): upper limit - random_seed (
int
): seed to set numpy's random seed
Returns
- sn_db (
float
): Drawn samples from parameterised triangular distribution