datagenerator.Parameters

   1from collections import defaultdict
   2import datetime
   3import json
   4import multiprocessing as mp
   5import os
   6import pathlib
   7import glob
   8import sqlite3
   9from subprocess import CalledProcessError
  10import numpy as np
  11import tables
  12import subprocess
  13
  14dir_name = os.path.dirname(__file__)
  15CONFIG_PATH = os.path.abspath(os.path.join(dir_name, "../config/config_ht.json"))
  16
  17
  18class _Borg:
  19    # Any objects using Borg base class will have same shared_state values (Alex Martelli's Borg)
  20    _shared_state = {}
  21
  22    def __init__(self):
  23        self.__dict__ = self._shared_state
  24
  25
  26class Parameters(_Borg):
  27    """
  28    Parameter object storing all model parameters.
  29
  30    Attributes
  31    ----------
  32    model_dir_name : str
  33        This is the name that the directory will be given, by default `seismic`
  34    parameter_file : str
  35        User parameters are read from the 'user_config' json file, and
  36        additional model parameters are set.
  37    test_mode : int
  38        If test_mode is set using an integer, the size of the model will
  39        be reduced e.g 100 makes a 100x100.It reduces the ammount of 
  40        time that the program takes to generate data usueful when testing
  41        a model.
  42
  43        **Warning: If you put too small a number, the model may fail due to 
  44        not enough space to place faults etc...**
  45
  46        Value should ideally be >= 50
  47    runid : str
  48        The string runid will be added to the final model directory.
  49    rpm_scaling_factors : dict
  50        These are user-defined parameter. You can use the defaults
  51        provided, but results might be unrealistic. 
  52        These might need to be tuned to get reaslistic synthetic
  53        data.
  54    sqldict : dict
  55        This is a dictionary structure that stores all the parameters
  56        of the model. This dictionary eventually gets written to a
  57        sqlite DB file.
  58
  59    Methods
  60    -------
  61    setup_model(rpm_factors=None) -> None:
  62        Method to set up all the necesary parameters to start a new model.
  63    make_directories() -> None:
  64        Method that generates all necessary directory structures on disk
  65    write_key_file():
  66        Method to generate a key file that describes coordinate systems, 
  67        track, bin, digi (inline, xlines, )
  68    write_to_logfile(msg, mainkey=None, subkey=None, val=""):
  69        Method that writes to the logfile
  70    """
  71
  72    def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None):
  73        """
  74        Initialize the Parameters object.
  75
  76        Parameters
  77        ----------
  78        user_config : `str`, optional
  79            This is the path on disk that points to a `.json` file
  80            that contains the configurations for each run, by default CONFIG_PATH
  81        test_mode : `int`, optional
  82            The parameter that sets the running mode, by default 0
  83        runid : `str`, optional
  84            This is the runid of the run, this comes in handy when you have many runs
  85            with various permutations of parameters, by default None
  86        """
  87        # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty
  88        self._shared_state = {}
  89        super().__init__()
  90        self.model_dir_name: str = "seismic"
  91        self.parameter_file = user_config
  92        self.test_mode = test_mode
  93        self.runid = runid
  94        self.rpm_scaling_factors = dict()
  95        self.sqldict = defaultdict(dict)
  96
  97    def __repr__(self):
  98        """
  99        Representation method
 100
 101        Parameters
 102        ----------
 103        self : `Parameters`
 104            The instance of the Parameters object
 105        """
 106        # Make nice repr instead of a print method
 107        items = ("\t{} = {}".format(k, v) for k, v in self.__dict__.items())
 108        return "{}:\n{}".format(self.__class__.__name__, "\n".join(sorted(items)))
 109
 110    def __getitem__(self, key: str):
 111        """__getitem__
 112
 113        Enable retrieval of values as though the class instance is a dict
 114
 115        Parameters
 116        ----------
 117        key : str
 118            The key desired to be accessed
 119
 120        Returns
 121        -------
 122        any
 123            Value of the key
 124        """
 125        return self._shared_state[key]
 126
 127    def setup_model(self, rpm_factors=None) -> None:
 128        """
 129        Setup Model
 130        -----------
 131        Sets up the creation of essential parameters and directories
 132
 133        Parameters
 134        ----------
 135        rpm_factors : `dict`, optional
 136            The rock physics model factors for generating the synthetic cube.
 137            By default the rpm factors come from a default in the main.py file
 138
 139        Returns
 140        -------
 141        None
 142        """
 143        # Set model parameters
 144        self._set_model_parameters(self.model_dir_name)
 145        self.make_directories()
 146        self.write_key_file()
 147        self._setup_rpm_scaling_factors(rpm_factors)
 148
 149        # Write model parameters to logfile
 150        self._write_initial_model_parameters_to_logfile()
 151
 152    def make_directories(self) -> None:
 153        """
 154        Make directories.
 155        -----------------
 156
 157        Creates the necessary directories to run the model.
 158
 159        This function creates the directories on disk
 160        necessary for the model to run.
 161
 162        Parameters
 163        ----------
 164        self : `Parameters`
 165
 166        Returns
 167        -------
 168        None
 169        """
 170        print(f"\nModel folder: {self.work_subfolder}")
 171        self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name
 172        for folder in [self.project_folder, self.work_subfolder, self.temp_folder]:
 173            try:
 174                os.stat(folder)
 175            except OSError:
 176                print(f"Creating directory: {folder}")
 177                # Try making folders (can fail if multiple models are being built simultaneously in a new dir)
 178                try:
 179                    os.mkdir(folder)
 180                except OSError:
 181                    pass
 182        try:
 183            os.system(f"chmod -R 777 {self.work_subfolder}")
 184        except OSError:
 185            print(f"Could not chmod {self.work_subfolder}. Continuing...")
 186            pass
 187
 188    def write_key_file(self) -> None:
 189        """
 190        Write key file
 191        --------------
 192
 193        Writes a file that ocntains important parameters about the cube.
 194
 195        Method that writes important parameters about the synthetic cube
 196        such as coordinate transforms and sizes.
 197
 198        Parameters
 199        ----------
 200        None
 201
 202        Returns
 203        ----------
 204        None
 205        """
 206        # Set plausible key file values
 207        geom_expand = dict()
 208        geom_expand["3D_NAME"] = "synthetic data for training"
 209        geom_expand["COORD_METHOD"] = 1
 210        geom_expand["DATA_TYPE"] = "3D"
 211        geom_expand["DELTA_BIN_NUM"] = 1
 212        geom_expand["DELTA_TRACK_NUM"] = 1
 213        geom_expand["DIGITIZATION"] = 4
 214        geom_expand["EPSG_CRS"] = 32066
 215        geom_expand["FIRST_BIN"] = 1000
 216        geom_expand["FIRST_TRACK"] = 2000
 217        geom_expand["FORMAT"] = 1
 218        geom_expand["N_BIN"] = self.cube_shape[1]
 219        geom_expand["N_SAMP"] = self.cube_shape[2]
 220        geom_expand["N_TRACK"] = self.cube_shape[0]
 221        geom_expand["PROJECTION"] = 316
 222        geom_expand["REAL_DELTA_X"] = 100.0
 223        geom_expand["REAL_DELTA_Y"] = 100.0
 224        geom_expand["REAL_GEO_X"] = 1250000.0
 225        geom_expand["REAL_GEO_Y"] = 10500000.0
 226        geom_expand["SKEW_ANGLE"] = 0.0
 227        geom_expand["SUBPOINT_CODE"] = "TTTBBB"
 228        geom_expand["TIME_OR_DEPTH"] = "TIME"
 229        geom_expand["TRACK_DIR"] = "H"
 230        geom_expand["XFORM_TO_WGS84"] = 1241
 231        geom_expand["ZERO_TIME"] = 0
 232
 233        # Write the keyfile
 234        outputkey = os.path.join(
 235            self.work_subfolder, "seismicCube_" + self.date_stamp + ".key"
 236        )
 237        with open(outputkey, "w") as key:
 238            key.write(
 239                "{}MESSAGE_FILE\n".format(20 * " ")
 240            )  # spaces are important here.. Require 20 of them
 241            key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"])
 242            key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"]))
 243            key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"])
 244            key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"]))
 245            key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"]))
 246            key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"]))
 247            key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"]))
 248            key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"]))
 249            key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"]))
 250            key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"]))
 251            key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"]))
 252            key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"]))
 253            key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"]))
 254            key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"]))
 255            key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"]))
 256            key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"]))
 257            key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"]))
 258            key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"]))
 259            key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"]))
 260            key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"])
 261            key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"])
 262            key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"])
 263            key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"]))
 264            key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"]))
 265        print(f"\nKeyfile created at {outputkey}")
 266
 267    def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None:
 268        """
 269        write_to_logfile
 270
 271        Method to write msg to model_parameter file
 272        (includes newline)
 273
 274        Parameters
 275        ----------
 276        msg : `string`
 277        Required string object that will be written tom model parameter file.
 278        mainkey : `string`
 279        String of the key to be written into de sql dictionary.
 280        subkey : `string`
 281        String of the subkey to be written into de sql dictionary.
 282        val : `string`
 283        String of the value that should be written into the sql dictionary.
 284        
 285        Returns
 286        -------
 287        None
 288        """
 289        if msg is not None:
 290            with open(self.logfile, "a") as f:
 291                f.write(f"{msg}\n")
 292        if mainkey is not None:
 293            self.sqldict[mainkey][subkey] = val
 294            # for k, v in self.sqldict.items():
 295            #     print(f"{k}: {v}")
 296
 297    def write_sqldict_to_logfile(self, logfile=None) -> None:
 298        """
 299        write_sqldict_to_logfile
 300
 301        Write the sql dictionary to the logfile
 302
 303        Parameters
 304        ----------
 305        logfile : `string`
 306        The path to the log file. By default None
 307        
 308        Returns
 309        -------
 310        None
 311        """
 312        if logfile is None:
 313            logfile = self.logfile
 314        with open(logfile, "a") as f:
 315            for k, nested in self.sqldict.items():
 316                print(k, file=f)
 317                if k == "model_id":
 318                    print(f"\t{nested}", file=f)
 319                else:
 320                    for subkey, value in nested.items():
 321                        print(f"\t{subkey}: {value}", file=f)
 322                print(file=f)
 323
 324    def write_sqldict_to_db(self) -> None:
 325        """
 326        write_sqldict_to_db
 327
 328        Method to write the sqldict to database sqlite file
 329
 330        Parameters
 331        ----------
 332        None
 333        
 334        Returns
 335        -------
 336        None
 337        """
 338        model_id = pathlib.Path(self.work_subfolder).name
 339        model_parameters = self.sqldict["model_parameters"]
 340        fault_keys = [k for k in self.sqldict.keys() if "fault" in k]
 341        closure_keys = [k for k in self.sqldict.keys() if "closure" in k]
 342
 343        conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db"))
 344        # tables = ["model_parameters", "fault_parameters", "closure_parameters"]
 345        # create tables
 346        sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})"
 347        conn.execute(sql)
 348        # insert model_parameters
 349        columns = "model_id, " + ", ".join(model_parameters.keys())
 350        placeholders = ", ".join("?" * (len(model_parameters) + 1))
 351        sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})"
 352        values = tuple([model_id] + [str(x) for x in model_parameters.values()])
 353        conn.execute(sql, values)
 354        conn.commit()
 355
 356        # fault parameters
 357        if len(fault_keys) > 0:
 358            f = tuple(self.sqldict[fault_keys[0]].keys())
 359            sql = f"CREATE TABLE fault_parameters ({','.join(f)})"
 360            conn.execute(sql)
 361            columns = ", ".join(self.sqldict[fault_keys[0]].keys())
 362            placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys()))
 363            for f in fault_keys:
 364                sql = (
 365                    f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})"
 366                )
 367                conn.execute(sql, tuple(self.sqldict[f].values()))
 368                conn.commit()
 369
 370        if len(closure_keys) > 0:
 371            c = tuple(self.sqldict[closure_keys[0]].keys())
 372            sql = f"CREATE TABLE closure_parameters ({','.join(c)})"
 373            conn.execute(sql)
 374            columns = ", ".join(self.sqldict[closure_keys[0]].keys())
 375            placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys()))
 376            for c in closure_keys:
 377                sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})"
 378                conn.execute(sql, tuple(self.sqldict[c].values()))
 379                conn.commit()
 380
 381    def _setup_rpm_scaling_factors(self, rpm_factors: dict) -> None:
 382        """
 383        Setup Rock Physics Model scaling factors
 384        ----------------------------------------
 385
 386        Method to initialize all the rock physics model
 387        scaling factors. Method also writes the values to
 388        the model_parameters log file.
 389
 390        Parameters
 391        ----------
 392        TODO remove the default in the main.py or have a single source of truth
 393        rpm_factors : `dict`
 394        Dictionary containing the scaling factors for the RPM.
 395        If no RPM factors are provided, the default values are used.
 396        
 397        Returns
 398        -------
 399        None
 400        """
 401        if rpm_factors and not self.test_mode:
 402            self.rpm_scaling_factors = rpm_factors
 403        else:
 404            # Use defaults for RPM Z-shifts and scaling factors
 405            self.rpm_scaling_factors = dict()
 406            self.rpm_scaling_factors["layershiftsamples"] = int(
 407                np.random.triangular(35, 75, 125)
 408            )
 409            self.rpm_scaling_factors["RPshiftsamples"] = int(
 410                np.random.triangular(5, 11, 20)
 411            )
 412            self.rpm_scaling_factors["shalerho_factor"] = 1.0
 413            self.rpm_scaling_factors["shalevp_factor"] = 1.0
 414            self.rpm_scaling_factors["shalevs_factor"] = 1.0
 415            self.rpm_scaling_factors["sandrho_factor"] = 1.0
 416            self.rpm_scaling_factors["sandvp_factor"] = 1.0
 417            self.rpm_scaling_factors["sandvs_factor"] = 1.0
 418            self.rpm_scaling_factors["nearfactor"] = 1.0
 419            self.rpm_scaling_factors["midfactor"] = 1.0
 420            self.rpm_scaling_factors["farfactor"] = 1.0
 421        # Write factors to logfile
 422        for k, v in self.rpm_scaling_factors.items():
 423            self.write_to_logfile(
 424                msg=f"{k}: {v}", mainkey="model_parameters", subkey=k, val=v
 425            )
 426
 427    def _set_model_parameters(self, dname: str) -> None:
 428        """
 429        Set Model Parameters
 430        ----------------------------------------
 431
 432        Method that sets model parameters from user-provided
 433        config.json file
 434
 435        Parameters
 436        ----------
 437        dname : `str`
 438        Directory name specified in the configuration file,
 439        or the default is used
 440        
 441        Returns
 442        -------
 443        None
 444        """
 445        self.current_dir = os.getcwd()
 446        self.start_time = datetime.datetime.now()
 447        self.date_stamp = self.year_plus_fraction()
 448
 449        # Read from input json
 450        self.parameters_json = self._read_json()
 451        self._read_user_params()
 452
 453        # Directories
 454        model_dir = f"{dname}__{self.date_stamp}"
 455        temp_dir = f"temp_folder__{self.date_stamp}"
 456        self.work_subfolder = os.path.abspath(
 457            os.path.join(self.project_folder, model_dir)
 458        )
 459        self.temp_folder = os.path.abspath(
 460            os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}")
 461        )
 462        if self.runid:
 463            self.work_subfolder = f"{self.work_subfolder}_{self.runid}"
 464            self.temp_folder = f"{self.temp_folder}_{self.runid}"
 465
 466        # Various model parameters, not in config
 467        self.num_lyr_lut = self.cube_shape[2] * 2 * self.infill_factor
 468        # 2500 voxels = 25x25x4m voxels size, 25% porosity and closures > ~40,000 bbl
 469        # Use the minimum voxel count as initial closure size filter
 470        self.closure_min_voxels = min(
 471            self.closure_min_voxels_simple,
 472            self.closure_min_voxels_faulted,
 473            self.closure_min_voxels_onlap,
 474        )
 475        self.order = self.bandwidth_ord
 476
 477        if self.test_mode:
 478            self._set_test_mode(self.test_mode, self.test_mode)
 479
 480        # Random choices are separated into this method
 481        self._randomly_chosen_model_parameters()
 482        # Fault choices
 483        self._fault_settings()
 484
 485        # Logfile
 486        self.logfile = os.path.join(
 487            self.work_subfolder, f"model_parameters_{self.date_stamp}.txt"
 488        )
 489
 490        # HDF file to store various model data
 491        self.hdf_master = os.path.join(
 492            self.work_subfolder, f"seismicCube__{self.date_stamp}.hdf"
 493        )
 494
 495    def _calculate_snr_after_lateral_filter(self, sn_db: float) -> float:
 496        """
 497        Calculate Signal:Noise Ratio after lateral filter
 498        ----------------------------------------
 499
 500        Method that computes the signal to noise ratio after
 501        the lateral filter is applied.
 502
 503        Parameters
 504        ----------
 505        sn_db : `float`
 506            Value of the signal to noise value from the database
 507        
 508        Returns
 509        -------
 510        pre_smear_snr : `float`
 511            Signal to noise ratio after the lateral filter is applied
 512        """
 513        snr_of_lateral_filter = 10 * np.log10(self.lateral_filter_size ** 2)
 514        pre_smear_snr = sn_db - snr_of_lateral_filter
 515        return pre_smear_snr
 516
 517    def _randomly_chosen_model_parameters(self) -> None:
 518        """
 519        Randomly Chosen Model Parameters
 520        ----------------------------------------
 521
 522        Method that sets all randomly chosen model parameters
 523
 524        Parameters
 525        ----------
 526        None
 527        
 528        Returns
 529        -------
 530        None
 531        """
 532        # Initial layer standard deviation
 533        self.initial_layer_stdev = (
 534            np.random.uniform(self.lyr_stdev[0], high=self.lyr_stdev[1])
 535            * self.infill_factor
 536        )
 537
 538        # lateral filter size, either 1x1, 3x3 or 5x5
 539        self.lateral_filter_size = int(np.random.uniform(0, 2) + 0.5) * 2 + 1
 540
 541        # Signal to noise in decibels
 542        sn_db = triangle_distribution_fix(
 543            left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2]
 544        )
 545        # sn_db = np.random.triangular(left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2])
 546        # self.sn_db = self._calculate_snr_after_lateral_filter(sn_db)
 547        self.sn_db = sn_db
 548
 549        # Percentage of layers that are sand
 550        self.sand_layer_pct = np.random.uniform(
 551            low=self.sand_layer_pct_min, high=self.sand_layer_pct_max
 552        )
 553
 554        # Minimum shallowest depth of seabed
 555        if (
 556            len(self.seabed_min_depth) > 1
 557        ):  # if low/high value provided, select a value between these
 558            self.seabed_min_depth = np.random.randint(
 559                low=self.seabed_min_depth[0], high=self.seabed_min_depth[1]
 560            )
 561
 562        # Low/High bandwidth to be used
 563        self.lowfreq = np.random.uniform(self.bandwidth_low[0], self.bandwidth_low[1])
 564        self.highfreq = np.random.uniform(
 565            self.bandwidth_high[0], self.bandwidth_high[1]
 566        )
 567
 568        # Choose whether to add coherent noise
 569        self.add_noise = np.random.choice((0, 1))
 570        if self.add_noise == 1:
 571            self.smiley_or_frowny = np.random.choice((0, 1))
 572            if self.smiley_or_frowny == 1:
 573                self.fnoise = "random_coherent_frowns"
 574                print("Coherent frowns will be inserted")
 575            else:
 576                self.fnoise = "random_coherent_smiles"
 577                print("Coherent smiles will be inserted")
 578        else:
 579            self.fnoise = "random"
 580            print("No coherent noise will be inserted")
 581
 582        # Salt inclusion
 583        # self.include_salt = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
 584        self.noise_stretch_factor = np.random.uniform(1.15, 1.35)
 585        if self.include_salt:
 586            print(
 587                "Salt will be inserted. noise_stretch_factor = {}".format(
 588                    np.around(self.noise_stretch_factor, 2)
 589                )
 590            )
 591        else:
 592            print("Salt will be NOT be inserted.")
 593
 594    def _read_json(self) -> dict:
 595        # TODO Move this to a separate function in utlis?
 596        """
 597        Read JSON file
 598        ----------------------------------------
 599
 600        Reads a json file on disk and loads it as
 601        dictionary
 602
 603        Parameters
 604        ----------
 605        None
 606        
 607        Returns
 608        -------
 609        config : `dict`
 610            Dictionary with the configuration options
 611        """
 612        with open(self.parameter_file) as f:
 613            config: dict = json.load(f)
 614        return config
 615
 616    def _read_user_params(self) -> None:
 617        """
 618        Read User Params
 619        ----------------------------------------
 620
 621        Takes the read in dictionary of JSON configuration
 622        and reads each parameter and inserts it into the
 623        attributes.
 624
 625        In the end it prints a summary of the parameters
 626        to the console.
 627
 628        Parameters
 629        ----------
 630        None
 631        
 632        Returns
 633        -------
 634        None
 635        """
 636        d = self._read_json()
 637        self.project = d["project"]
 638        self.project_folder = d["project_folder"]
 639        wfolder = d["work_folder"]
 640        if not os.path.exists(wfolder):
 641            wfolder = "/tmp"  # In case work_folder does not exist, use /tmp
 642        self.work_folder = wfolder
 643        # read parameters into Parameter class attributes
 644        self.cube_shape = tuple(d["cube_shape"])
 645        self.incident_angles = tuple(d["incident_angles"])
 646        self.digi = d["digi"]
 647        self.infill_factor = d["infill_factor"]
 648        self.lyr_stdev = d["initial_layer_stdev"]
 649        self.thickness_min = d["thickness_min"]
 650        self.thickness_max = d["thickness_max"]
 651        self.seabed_min_depth = d["seabed_min_depth"]
 652        self.snr_db = d["signal_to_noise_ratio_db"]
 653        # self.random_depth_perturb = d['random_depth_perturb_range']
 654        self.bandwidth_low = d["bandwidth_low"]
 655        self.bandwidth_high = d["bandwidth_high"]
 656        self.bandwidth_ord = d["bandwidth_ord"]
 657        self.dip_factor_max = d["dip_factor_max"]
 658        self.min_number_faults = d["min_number_faults"]
 659        self.max_number_faults = d["max_number_faults"]
 660        self.basin_floor_fans = d["basin_floor_fans"]
 661        self.pad_samples = d["pad_samples"]
 662        self.qc_plots = d["extra_qc_plots"]
 663        self.verbose = d["verbose"]
 664        self.include_channels = d["include_channels"]
 665        self.include_salt = d["include_salt"]
 666        self.max_column_height = d["max_column_height"]
 667        self.closure_types = d["closure_types"]
 668        self.closure_min_voxels_simple = d["min_closure_voxels_simple"]
 669        self.closure_min_voxels_faulted = d["min_closure_voxels_faulted"]
 670        self.closure_min_voxels_onlap = d["min_closure_voxels_onlap"]
 671        self.partial_voxels = d["partial_voxels"]
 672        self.variable_shale_ng = d["variable_shale_ng"]
 673        self.sand_layer_thickness = d["sand_layer_thickness"]
 674        self.sand_layer_pct_min = d["sand_layer_fraction"]["min"]
 675        self.sand_layer_pct_max = d["sand_layer_fraction"]["max"]
 676        self.hdf_store = d["write_to_hdf"]
 677        self.broadband_qc_volume = d["broadband_qc_volume"]
 678        self.model_qc_volumes = d["model_qc_volumes"]
 679        self.multiprocess_bp = d["multiprocess_bp"]
 680
 681        # print em
 682        self.__repr__()
 683
 684    def _set_test_mode(self, size_x: int = 50, size_y: int = 50) -> None:
 685        """
 686        Set test mode
 687        -------------
 688
 689        Sets whether the parameters for testinf mode. If no size integer
 690        is provided is defaults to 50.
 691
 692        This value is a good minimum because it allows for the 3D model
 693        to be able to contain faults and other objects inside.
 694
 695        Parameters
 696        ----------
 697        size_x : `int`
 698        The parameter that sets the size of the model in the x direction
 699        size_y : `int`
 700        The parameter that sets the size of the model in the y direction
 701        
 702        Returns
 703        -------
 704        None
 705        """
 706
 707        # Set output model folder in work_folder location but with same directory name as project_folder
 708        normpath = (
 709            os.path.normpath(self.project_folder) + "_test_mode_"
 710        )  # strip trailing / if added
 711        new_project_folder = os.path.join(self.work_folder, os.path.basename(normpath))
 712        # Put all folders inside project folder for easy deleting
 713        self.work_folder = new_project_folder
 714        self.project_folder = new_project_folder
 715        self.work_subfolder = os.path.join(
 716            new_project_folder, os.path.basename(self.work_subfolder)
 717        )
 718        if self.runid:
 719            # Append runid if provided
 720            self.temp_folder = f"{self.temp_folder}_{self.runid}__{self.date_stamp}"
 721        else:
 722            self.temp_folder = os.path.abspath(
 723                os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}")
 724            )
 725        # Set smaller sized model
 726        self.cube_shape = tuple([size_x, size_y, self.cube_shape[-1]])
 727        # Print message to user
 728        print(
 729            "{0}\nTesting Mode\nOutput Folder: {1}\nCube_Shape: {2}\n{0}".format(
 730                36 * "-", self.project_folder, self.cube_shape
 731            )
 732        )
 733
 734    def _fault_settings(self) -> None:
 735        """
 736        Set Fault Settings
 737        -------------
 738
 739        Sets the parameters that will be used to generate faults throughout
 740        the synthetic model.
 741
 742        Parameters
 743        ----------
 744        None
 745        
 746        Returns
 747        -------
 748        None
 749        """
 750        # Fault parameters
 751        self.low_fault_throw = 5.0 * self.infill_factor
 752        self.high_fault_throw = 35.0 * self.infill_factor
 753
 754        # mode & clustering are randomly chosen
 755        self.mode = np.random.choice([0, 1, 2], 1)[
 756            0
 757        ]
 758        self.clustering = np.random.choice([0, 1, 2], 1)[0]
 759
 760        if self.mode == 0:
 761            # As random as it can be
 762            self.number_faults = np.random.randint(
 763                self.min_number_faults, self.max_number_faults
 764            )
 765            self.fmode = "random"
 766
 767        elif self.mode == 1:
 768            if self.clustering == 0:
 769                self.fmode = "self_branching"
 770                # Self Branching. avoid large fault
 771                self.number_faults = np.random.randint(
 772                    3, 9
 773                )
 774                self.low_fault_throw = 5.0 * self.infill_factor
 775                self.high_fault_throw = 15.0 * self.infill_factor
 776            if self.clustering == 1:
 777                # Stair case
 778                self.fmode = "stair_case"
 779                self.number_faults = np.random.randint(
 780                    5, self.max_number_faults
 781                )
 782            if self.clustering == 2:
 783                # Relay ramps
 784                self.fmode = "relay_ramp"
 785                self.number_faults = np.random.randint(
 786                    3, 9
 787                )
 788                self.low_fault_throw = 5.0 * self.infill_factor
 789                self.high_fault_throw = 15.0 * self.infill_factor
 790        elif self.mode == 2:
 791            # Horst and graben
 792            self.fmode = "horst_and_graben"
 793            self.number_faults = np.random.randint(
 794                3, 7
 795            )
 796
 797        self.fault_param = [
 798            str(self.mode) + str(self.clustering),
 799            self.number_faults,
 800            self.low_fault_throw,
 801            self.high_fault_throw,
 802        ]
 803
 804    def _get_commit_hash(self) -> str:
 805        """
 806        Get Commit Hash
 807        -------------
 808
 809        Gets the commit hash of the current git repository.
 810
 811        #TODO Explain what this is for exactly
 812
 813        Parameters
 814        ----------
 815        None
 816        
 817        Returns
 818        -------
 819        sha : `str`
 820            The commit hash of the current git repository
 821        """
 822
 823        try:
 824            sha = (
 825                subprocess.check_output(["git", "rev-parse", "HEAD"])
 826                .decode("utf-8")
 827                .strip()
 828            )
 829        except CalledProcessError:
 830            sha = "cwd not a git repository"
 831        return sha
 832
 833    def _write_initial_model_parameters_to_logfile(self) -> None:
 834        """
 835        Write Initial Model Parameters to Logfile
 836        ----------------------------------------
 837
 838        Method that writes the initial parameters set for the model
 839        to the logfile.
 840
 841        Parameters
 842        ----------
 843        None
 844        
 845        Returns
 846        -------
 847        None
 848        """
 849        _sha = self._get_commit_hash()
 850        self.write_to_logfile(
 851            f"SHA: {_sha}", mainkey="model_parameters", subkey="sha", val=_sha
 852        )
 853        self.write_to_logfile(
 854            f"modeling start time: {self.start_time}",
 855            mainkey="model_parameters",
 856            subkey="start_time",
 857            val=self.start_time,
 858        )
 859        self.write_to_logfile(
 860            f"project_folder: {self.project_folder}",
 861            mainkey="model_parameters",
 862            subkey="project_folder",
 863            val=self.project_folder,
 864        )
 865        self.write_to_logfile(
 866            f"work_subfolder: {self.work_subfolder}",
 867            mainkey="model_parameters",
 868            subkey="work_subfolder",
 869            val=self.work_subfolder,
 870        )
 871        self.write_to_logfile(
 872            f"cube_shape: {self.cube_shape}",
 873            mainkey="model_parameters",
 874            subkey="cube_shape",
 875            val=self.cube_shape,
 876        )
 877        self.write_to_logfile(
 878            f"incident_angles: {self.incident_angles}",
 879            mainkey="model_parameters",
 880            subkey="incident_angles",
 881            val=self.incident_angles,
 882        )
 883        self.write_to_logfile(
 884            f"number_faults: {self.number_faults}",
 885            mainkey="model_parameters",
 886            subkey="number_faults",
 887            val=self.number_faults,
 888        )
 889        self.write_to_logfile(
 890            f"lateral_filter_size: {self.lateral_filter_size}",
 891            mainkey="model_parameters",
 892            subkey="lateral_filter_size",
 893            val=self.lateral_filter_size,
 894        )
 895        self.write_to_logfile(
 896            f"salt_inserted: {self.include_salt}",
 897            mainkey="model_parameters",
 898            subkey="salt_inserted",
 899            val=self.include_salt,
 900        )
 901        self.write_to_logfile(
 902            f"salt noise_stretch_factor: {self.noise_stretch_factor:.2f}",
 903            mainkey="model_parameters",
 904            subkey="salt_noise_stretch_factor",
 905            val=self.noise_stretch_factor,
 906        )
 907        self.write_to_logfile(
 908            f"bandpass_bandlimits: {self.lowfreq:.2f}, {self.highfreq:.2f}"
 909        )
 910        self.write_to_logfile(
 911            msg=None,
 912            mainkey="model_parameters",
 913            subkey="bandpass_bandlimit_low",
 914            val=self.lowfreq,
 915        )
 916        self.write_to_logfile(
 917            msg=None,
 918            mainkey="model_parameters",
 919            subkey="bandpass_bandlimit_high",
 920            val=self.highfreq,
 921        )
 922        self.write_to_logfile(
 923            f"sn_db: {self.sn_db:.2f}",
 924            mainkey="model_parameters",
 925            subkey="sn_db",
 926            val=self.sn_db,
 927        )
 928        self.write_to_logfile(
 929            f"initial layer depth stdev (flatness of layer): {self.initial_layer_stdev:.2f}",
 930            mainkey="model_parameters",
 931            subkey="initial_layer_stdev",
 932            val=self.initial_layer_stdev,
 933        )
 934
 935    @staticmethod
 936    def year_plus_fraction() -> str:
 937        # TODO Move this to utils separate module
 938        """
 939        Year Plus Fraction
 940        ----------------------------------------
 941
 942        Method generates a time stamp in the format of
 943        year + fraction of year.
 944
 945        Parameters
 946        ----------
 947        None
 948        
 949        Returns
 950        -------
 951        fraction of the year : str
 952            The time stamp in the format of year + fraction of year
 953
 954        """
 955        now = datetime.datetime.now()
 956        year = now.year
 957        secs_in_year = datetime.timedelta(days=365).total_seconds()
 958        fraction_of_year = (
 959            now - datetime.datetime(year, 1, 1, 0, 0)
 960        ).total_seconds() / secs_in_year
 961        return format(year + fraction_of_year, "14.8f").replace(" ", "")
 962
 963    def hdf_setup(self, hdf_name: str) -> None:
 964        """
 965        Setup HDF files
 966        ---------------
 967
 968        This method sets up the HDF structures
 969
 970        Parameters
 971        ----------
 972        hdf_name : str
 973            The name of the HDF file to be created
 974        
 975        Returns
 976        -------
 977        None
 978        """
 979        num_threads = min(8, mp.cpu_count() - 1)
 980        tables.set_blosc_max_threads(num_threads)
 981        self.hdf_filename = os.path.join(self.temp_folder, hdf_name)
 982        self.filters = tables.Filters(
 983            complevel=5, complib="blosc"
 984        )  # compression with fast write speed
 985        self.h5file = tables.open_file(self.hdf_filename, "w")
 986        self.h5file.create_group("/", "ModelData")
 987
 988    def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray:
 989        
 990        """
 991        HDF Initialize
 992        ----------------------------------------
 993
 994        Method that initializes the HDF chunked
 995        array
 996
 997        Parameters
 998        ----------
 999        dset_name : str
1000            The name of the dataset to be created
1001        shape : tuple
1002
1003        
1004        Returns
1005        -------
1006        new_array: tables.CArray
1007        """
1008        if "float" in dtype:
1009            atom = tables.FloatAtom()
1010        elif "uint8" in dtype:
1011            atom = tables.UInt8Atom()
1012        else:
1013            atom = tables.IntAtom()
1014        group = self.h5file.root.ModelData
1015        new_array = self.h5file.create_carray(
1016            group, dset_name, atom, shape, filters=self.filters
1017        )
1018        return new_array
1019
1020    def hdf_node_list(self):
1021        return [x.name for x in self.h5file.list_nodes("ModelData")]
1022
1023    def hdf_remove_node_list(self, dset_name):
1024        group = self.h5file.root.ModelData
1025        try:
1026            self.h5file.remove_node(group, dset_name)
1027        except:
1028            pass
1029        self.hdf_node_list
1030
1031
1032def triangle_distribution_fix(left, mode, right, random_seed=None):
1033    """
1034    Triangle Distribution Fix
1035    -------------------------
1036
1037    Draw samples from the triangular distribution over the interval [left, right] with modifications.
1038
1039    Ensure some values are drawn at the left and right values by enlarging the interval to
1040    [left - (mode - left), right + (right - mode)]
1041
1042    Parameters
1043    ----------
1044    left: `float`
1045        lower limit
1046    mode: `float`
1047        mode
1048    right: `float`
1049        upper limit
1050    random_seed: `int`
1051        seed to set numpy's random seed
1052
1053    Returns
1054    -------
1055    sn_db: `float`
1056        Drawn samples from parameterised triangular distribution
1057    """
1058    sn_db = 0
1059    while sn_db < left or sn_db > right:
1060        if random_seed:
1061            np.random.seed(random_seed)
1062        sn_db = np.random.triangular(left - (mode - left), mode, right + (right - mode))
1063
1064    return sn_db
class Parameters(_Borg):
  27class Parameters(_Borg):
  28    """
  29    Parameter object storing all model parameters.
  30
  31    Attributes
  32    ----------
  33    model_dir_name : str
  34        This is the name that the directory will be given, by default `seismic`
  35    parameter_file : str
  36        User parameters are read from the 'user_config' json file, and
  37        additional model parameters are set.
  38    test_mode : int
  39        If test_mode is set using an integer, the size of the model will
  40        be reduced e.g 100 makes a 100x100.It reduces the ammount of 
  41        time that the program takes to generate data usueful when testing
  42        a model.
  43
  44        **Warning: If you put too small a number, the model may fail due to 
  45        not enough space to place faults etc...**
  46
  47        Value should ideally be >= 50
  48    runid : str
  49        The string runid will be added to the final model directory.
  50    rpm_scaling_factors : dict
  51        These are user-defined parameter. You can use the defaults
  52        provided, but results might be unrealistic. 
  53        These might need to be tuned to get reaslistic synthetic
  54        data.
  55    sqldict : dict
  56        This is a dictionary structure that stores all the parameters
  57        of the model. This dictionary eventually gets written to a
  58        sqlite DB file.
  59
  60    Methods
  61    -------
  62    setup_model(rpm_factors=None) -> None:
  63        Method to set up all the necesary parameters to start a new model.
  64    make_directories() -> None:
  65        Method that generates all necessary directory structures on disk
  66    write_key_file():
  67        Method to generate a key file that describes coordinate systems, 
  68        track, bin, digi (inline, xlines, )
  69    write_to_logfile(msg, mainkey=None, subkey=None, val=""):
  70        Method that writes to the logfile
  71    """
  72
  73    def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None):
  74        """
  75        Initialize the Parameters object.
  76
  77        Parameters
  78        ----------
  79        user_config : `str`, optional
  80            This is the path on disk that points to a `.json` file
  81            that contains the configurations for each run, by default CONFIG_PATH
  82        test_mode : `int`, optional
  83            The parameter that sets the running mode, by default 0
  84        runid : `str`, optional
  85            This is the runid of the run, this comes in handy when you have many runs
  86            with various permutations of parameters, by default None
  87        """
  88        # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty
  89        self._shared_state = {}
  90        super().__init__()
  91        self.model_dir_name: str = "seismic"
  92        self.parameter_file = user_config
  93        self.test_mode = test_mode
  94        self.runid = runid
  95        self.rpm_scaling_factors = dict()
  96        self.sqldict = defaultdict(dict)
  97
  98    def __repr__(self):
  99        """
 100        Representation method
 101
 102        Parameters
 103        ----------
 104        self : `Parameters`
 105            The instance of the Parameters object
 106        """
 107        # Make nice repr instead of a print method
 108        items = ("\t{} = {}".format(k, v) for k, v in self.__dict__.items())
 109        return "{}:\n{}".format(self.__class__.__name__, "\n".join(sorted(items)))
 110
 111    def __getitem__(self, key: str):
 112        """__getitem__
 113
 114        Enable retrieval of values as though the class instance is a dict
 115
 116        Parameters
 117        ----------
 118        key : str
 119            The key desired to be accessed
 120
 121        Returns
 122        -------
 123        any
 124            Value of the key
 125        """
 126        return self._shared_state[key]
 127
 128    def setup_model(self, rpm_factors=None) -> None:
 129        """
 130        Setup Model
 131        -----------
 132        Sets up the creation of essential parameters and directories
 133
 134        Parameters
 135        ----------
 136        rpm_factors : `dict`, optional
 137            The rock physics model factors for generating the synthetic cube.
 138            By default the rpm factors come from a default in the main.py file
 139
 140        Returns
 141        -------
 142        None
 143        """
 144        # Set model parameters
 145        self._set_model_parameters(self.model_dir_name)
 146        self.make_directories()
 147        self.write_key_file()
 148        self._setup_rpm_scaling_factors(rpm_factors)
 149
 150        # Write model parameters to logfile
 151        self._write_initial_model_parameters_to_logfile()
 152
 153    def make_directories(self) -> None:
 154        """
 155        Make directories.
 156        -----------------
 157
 158        Creates the necessary directories to run the model.
 159
 160        This function creates the directories on disk
 161        necessary for the model to run.
 162
 163        Parameters
 164        ----------
 165        self : `Parameters`
 166
 167        Returns
 168        -------
 169        None
 170        """
 171        print(f"\nModel folder: {self.work_subfolder}")
 172        self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name
 173        for folder in [self.project_folder, self.work_subfolder, self.temp_folder]:
 174            try:
 175                os.stat(folder)
 176            except OSError:
 177                print(f"Creating directory: {folder}")
 178                # Try making folders (can fail if multiple models are being built simultaneously in a new dir)
 179                try:
 180                    os.mkdir(folder)
 181                except OSError:
 182                    pass
 183        try:
 184            os.system(f"chmod -R 777 {self.work_subfolder}")
 185        except OSError:
 186            print(f"Could not chmod {self.work_subfolder}. Continuing...")
 187            pass
 188
 189    def write_key_file(self) -> None:
 190        """
 191        Write key file
 192        --------------
 193
 194        Writes a file that ocntains important parameters about the cube.
 195
 196        Method that writes important parameters about the synthetic cube
 197        such as coordinate transforms and sizes.
 198
 199        Parameters
 200        ----------
 201        None
 202
 203        Returns
 204        ----------
 205        None
 206        """
 207        # Set plausible key file values
 208        geom_expand = dict()
 209        geom_expand["3D_NAME"] = "synthetic data for training"
 210        geom_expand["COORD_METHOD"] = 1
 211        geom_expand["DATA_TYPE"] = "3D"
 212        geom_expand["DELTA_BIN_NUM"] = 1
 213        geom_expand["DELTA_TRACK_NUM"] = 1
 214        geom_expand["DIGITIZATION"] = 4
 215        geom_expand["EPSG_CRS"] = 32066
 216        geom_expand["FIRST_BIN"] = 1000
 217        geom_expand["FIRST_TRACK"] = 2000
 218        geom_expand["FORMAT"] = 1
 219        geom_expand["N_BIN"] = self.cube_shape[1]
 220        geom_expand["N_SAMP"] = self.cube_shape[2]
 221        geom_expand["N_TRACK"] = self.cube_shape[0]
 222        geom_expand["PROJECTION"] = 316
 223        geom_expand["REAL_DELTA_X"] = 100.0
 224        geom_expand["REAL_DELTA_Y"] = 100.0
 225        geom_expand["REAL_GEO_X"] = 1250000.0
 226        geom_expand["REAL_GEO_Y"] = 10500000.0
 227        geom_expand["SKEW_ANGLE"] = 0.0
 228        geom_expand["SUBPOINT_CODE"] = "TTTBBB"
 229        geom_expand["TIME_OR_DEPTH"] = "TIME"
 230        geom_expand["TRACK_DIR"] = "H"
 231        geom_expand["XFORM_TO_WGS84"] = 1241
 232        geom_expand["ZERO_TIME"] = 0
 233
 234        # Write the keyfile
 235        outputkey = os.path.join(
 236            self.work_subfolder, "seismicCube_" + self.date_stamp + ".key"
 237        )
 238        with open(outputkey, "w") as key:
 239            key.write(
 240                "{}MESSAGE_FILE\n".format(20 * " ")
 241            )  # spaces are important here.. Require 20 of them
 242            key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"])
 243            key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"]))
 244            key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"])
 245            key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"]))
 246            key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"]))
 247            key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"]))
 248            key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"]))
 249            key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"]))
 250            key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"]))
 251            key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"]))
 252            key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"]))
 253            key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"]))
 254            key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"]))
 255            key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"]))
 256            key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"]))
 257            key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"]))
 258            key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"]))
 259            key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"]))
 260            key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"]))
 261            key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"])
 262            key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"])
 263            key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"])
 264            key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"]))
 265            key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"]))
 266        print(f"\nKeyfile created at {outputkey}")
 267
 268    def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None:
 269        """
 270        write_to_logfile
 271
 272        Method to write msg to model_parameter file
 273        (includes newline)
 274
 275        Parameters
 276        ----------
 277        msg : `string`
 278        Required string object that will be written tom model parameter file.
 279        mainkey : `string`
 280        String of the key to be written into de sql dictionary.
 281        subkey : `string`
 282        String of the subkey to be written into de sql dictionary.
 283        val : `string`
 284        String of the value that should be written into the sql dictionary.
 285        
 286        Returns
 287        -------
 288        None
 289        """
 290        if msg is not None:
 291            with open(self.logfile, "a") as f:
 292                f.write(f"{msg}\n")
 293        if mainkey is not None:
 294            self.sqldict[mainkey][subkey] = val
 295            # for k, v in self.sqldict.items():
 296            #     print(f"{k}: {v}")
 297
 298    def write_sqldict_to_logfile(self, logfile=None) -> None:
 299        """
 300        write_sqldict_to_logfile
 301
 302        Write the sql dictionary to the logfile
 303
 304        Parameters
 305        ----------
 306        logfile : `string`
 307        The path to the log file. By default None
 308        
 309        Returns
 310        -------
 311        None
 312        """
 313        if logfile is None:
 314            logfile = self.logfile
 315        with open(logfile, "a") as f:
 316            for k, nested in self.sqldict.items():
 317                print(k, file=f)
 318                if k == "model_id":
 319                    print(f"\t{nested}", file=f)
 320                else:
 321                    for subkey, value in nested.items():
 322                        print(f"\t{subkey}: {value}", file=f)
 323                print(file=f)
 324
 325    def write_sqldict_to_db(self) -> None:
 326        """
 327        write_sqldict_to_db
 328
 329        Method to write the sqldict to database sqlite file
 330
 331        Parameters
 332        ----------
 333        None
 334        
 335        Returns
 336        -------
 337        None
 338        """
 339        model_id = pathlib.Path(self.work_subfolder).name
 340        model_parameters = self.sqldict["model_parameters"]
 341        fault_keys = [k for k in self.sqldict.keys() if "fault" in k]
 342        closure_keys = [k for k in self.sqldict.keys() if "closure" in k]
 343
 344        conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db"))
 345        # tables = ["model_parameters", "fault_parameters", "closure_parameters"]
 346        # create tables
 347        sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})"
 348        conn.execute(sql)
 349        # insert model_parameters
 350        columns = "model_id, " + ", ".join(model_parameters.keys())
 351        placeholders = ", ".join("?" * (len(model_parameters) + 1))
 352        sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})"
 353        values = tuple([model_id] + [str(x) for x in model_parameters.values()])
 354        conn.execute(sql, values)
 355        conn.commit()
 356
 357        # fault parameters
 358        if len(fault_keys) > 0:
 359            f = tuple(self.sqldict[fault_keys[0]].keys())
 360            sql = f"CREATE TABLE fault_parameters ({','.join(f)})"
 361            conn.execute(sql)
 362            columns = ", ".join(self.sqldict[fault_keys[0]].keys())
 363            placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys()))
 364            for f in fault_keys:
 365                sql = (
 366                    f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})"
 367                )
 368                conn.execute(sql, tuple(self.sqldict[f].values()))
 369                conn.commit()
 370
 371        if len(closure_keys) > 0:
 372            c = tuple(self.sqldict[closure_keys[0]].keys())
 373            sql = f"CREATE TABLE closure_parameters ({','.join(c)})"
 374            conn.execute(sql)
 375            columns = ", ".join(self.sqldict[closure_keys[0]].keys())
 376            placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys()))
 377            for c in closure_keys:
 378                sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})"
 379                conn.execute(sql, tuple(self.sqldict[c].values()))
 380                conn.commit()
 381
 382    def _setup_rpm_scaling_factors(self, rpm_factors: dict) -> None:
 383        """
 384        Setup Rock Physics Model scaling factors
 385        ----------------------------------------
 386
 387        Method to initialize all the rock physics model
 388        scaling factors. Method also writes the values to
 389        the model_parameters log file.
 390
 391        Parameters
 392        ----------
 393        TODO remove the default in the main.py or have a single source of truth
 394        rpm_factors : `dict`
 395        Dictionary containing the scaling factors for the RPM.
 396        If no RPM factors are provided, the default values are used.
 397        
 398        Returns
 399        -------
 400        None
 401        """
 402        if rpm_factors and not self.test_mode:
 403            self.rpm_scaling_factors = rpm_factors
 404        else:
 405            # Use defaults for RPM Z-shifts and scaling factors
 406            self.rpm_scaling_factors = dict()
 407            self.rpm_scaling_factors["layershiftsamples"] = int(
 408                np.random.triangular(35, 75, 125)
 409            )
 410            self.rpm_scaling_factors["RPshiftsamples"] = int(
 411                np.random.triangular(5, 11, 20)
 412            )
 413            self.rpm_scaling_factors["shalerho_factor"] = 1.0
 414            self.rpm_scaling_factors["shalevp_factor"] = 1.0
 415            self.rpm_scaling_factors["shalevs_factor"] = 1.0
 416            self.rpm_scaling_factors["sandrho_factor"] = 1.0
 417            self.rpm_scaling_factors["sandvp_factor"] = 1.0
 418            self.rpm_scaling_factors["sandvs_factor"] = 1.0
 419            self.rpm_scaling_factors["nearfactor"] = 1.0
 420            self.rpm_scaling_factors["midfactor"] = 1.0
 421            self.rpm_scaling_factors["farfactor"] = 1.0
 422        # Write factors to logfile
 423        for k, v in self.rpm_scaling_factors.items():
 424            self.write_to_logfile(
 425                msg=f"{k}: {v}", mainkey="model_parameters", subkey=k, val=v
 426            )
 427
 428    def _set_model_parameters(self, dname: str) -> None:
 429        """
 430        Set Model Parameters
 431        ----------------------------------------
 432
 433        Method that sets model parameters from user-provided
 434        config.json file
 435
 436        Parameters
 437        ----------
 438        dname : `str`
 439        Directory name specified in the configuration file,
 440        or the default is used
 441        
 442        Returns
 443        -------
 444        None
 445        """
 446        self.current_dir = os.getcwd()
 447        self.start_time = datetime.datetime.now()
 448        self.date_stamp = self.year_plus_fraction()
 449
 450        # Read from input json
 451        self.parameters_json = self._read_json()
 452        self._read_user_params()
 453
 454        # Directories
 455        model_dir = f"{dname}__{self.date_stamp}"
 456        temp_dir = f"temp_folder__{self.date_stamp}"
 457        self.work_subfolder = os.path.abspath(
 458            os.path.join(self.project_folder, model_dir)
 459        )
 460        self.temp_folder = os.path.abspath(
 461            os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}")
 462        )
 463        if self.runid:
 464            self.work_subfolder = f"{self.work_subfolder}_{self.runid}"
 465            self.temp_folder = f"{self.temp_folder}_{self.runid}"
 466
 467        # Various model parameters, not in config
 468        self.num_lyr_lut = self.cube_shape[2] * 2 * self.infill_factor
 469        # 2500 voxels = 25x25x4m voxels size, 25% porosity and closures > ~40,000 bbl
 470        # Use the minimum voxel count as initial closure size filter
 471        self.closure_min_voxels = min(
 472            self.closure_min_voxels_simple,
 473            self.closure_min_voxels_faulted,
 474            self.closure_min_voxels_onlap,
 475        )
 476        self.order = self.bandwidth_ord
 477
 478        if self.test_mode:
 479            self._set_test_mode(self.test_mode, self.test_mode)
 480
 481        # Random choices are separated into this method
 482        self._randomly_chosen_model_parameters()
 483        # Fault choices
 484        self._fault_settings()
 485
 486        # Logfile
 487        self.logfile = os.path.join(
 488            self.work_subfolder, f"model_parameters_{self.date_stamp}.txt"
 489        )
 490
 491        # HDF file to store various model data
 492        self.hdf_master = os.path.join(
 493            self.work_subfolder, f"seismicCube__{self.date_stamp}.hdf"
 494        )
 495
 496    def _calculate_snr_after_lateral_filter(self, sn_db: float) -> float:
 497        """
 498        Calculate Signal:Noise Ratio after lateral filter
 499        ----------------------------------------
 500
 501        Method that computes the signal to noise ratio after
 502        the lateral filter is applied.
 503
 504        Parameters
 505        ----------
 506        sn_db : `float`
 507            Value of the signal to noise value from the database
 508        
 509        Returns
 510        -------
 511        pre_smear_snr : `float`
 512            Signal to noise ratio after the lateral filter is applied
 513        """
 514        snr_of_lateral_filter = 10 * np.log10(self.lateral_filter_size ** 2)
 515        pre_smear_snr = sn_db - snr_of_lateral_filter
 516        return pre_smear_snr
 517
 518    def _randomly_chosen_model_parameters(self) -> None:
 519        """
 520        Randomly Chosen Model Parameters
 521        ----------------------------------------
 522
 523        Method that sets all randomly chosen model parameters
 524
 525        Parameters
 526        ----------
 527        None
 528        
 529        Returns
 530        -------
 531        None
 532        """
 533        # Initial layer standard deviation
 534        self.initial_layer_stdev = (
 535            np.random.uniform(self.lyr_stdev[0], high=self.lyr_stdev[1])
 536            * self.infill_factor
 537        )
 538
 539        # lateral filter size, either 1x1, 3x3 or 5x5
 540        self.lateral_filter_size = int(np.random.uniform(0, 2) + 0.5) * 2 + 1
 541
 542        # Signal to noise in decibels
 543        sn_db = triangle_distribution_fix(
 544            left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2]
 545        )
 546        # sn_db = np.random.triangular(left=self.snr_db[0], mode=self.snr_db[1], right=self.snr_db[2])
 547        # self.sn_db = self._calculate_snr_after_lateral_filter(sn_db)
 548        self.sn_db = sn_db
 549
 550        # Percentage of layers that are sand
 551        self.sand_layer_pct = np.random.uniform(
 552            low=self.sand_layer_pct_min, high=self.sand_layer_pct_max
 553        )
 554
 555        # Minimum shallowest depth of seabed
 556        if (
 557            len(self.seabed_min_depth) > 1
 558        ):  # if low/high value provided, select a value between these
 559            self.seabed_min_depth = np.random.randint(
 560                low=self.seabed_min_depth[0], high=self.seabed_min_depth[1]
 561            )
 562
 563        # Low/High bandwidth to be used
 564        self.lowfreq = np.random.uniform(self.bandwidth_low[0], self.bandwidth_low[1])
 565        self.highfreq = np.random.uniform(
 566            self.bandwidth_high[0], self.bandwidth_high[1]
 567        )
 568
 569        # Choose whether to add coherent noise
 570        self.add_noise = np.random.choice((0, 1))
 571        if self.add_noise == 1:
 572            self.smiley_or_frowny = np.random.choice((0, 1))
 573            if self.smiley_or_frowny == 1:
 574                self.fnoise = "random_coherent_frowns"
 575                print("Coherent frowns will be inserted")
 576            else:
 577                self.fnoise = "random_coherent_smiles"
 578                print("Coherent smiles will be inserted")
 579        else:
 580            self.fnoise = "random"
 581            print("No coherent noise will be inserted")
 582
 583        # Salt inclusion
 584        # self.include_salt = np.random.choice([True, False], 1, p=[0.5, 0.5])[0]
 585        self.noise_stretch_factor = np.random.uniform(1.15, 1.35)
 586        if self.include_salt:
 587            print(
 588                "Salt will be inserted. noise_stretch_factor = {}".format(
 589                    np.around(self.noise_stretch_factor, 2)
 590                )
 591            )
 592        else:
 593            print("Salt will be NOT be inserted.")
 594
 595    def _read_json(self) -> dict:
 596        # TODO Move this to a separate function in utlis?
 597        """
 598        Read JSON file
 599        ----------------------------------------
 600
 601        Reads a json file on disk and loads it as
 602        dictionary
 603
 604        Parameters
 605        ----------
 606        None
 607        
 608        Returns
 609        -------
 610        config : `dict`
 611            Dictionary with the configuration options
 612        """
 613        with open(self.parameter_file) as f:
 614            config: dict = json.load(f)
 615        return config
 616
 617    def _read_user_params(self) -> None:
 618        """
 619        Read User Params
 620        ----------------------------------------
 621
 622        Takes the read in dictionary of JSON configuration
 623        and reads each parameter and inserts it into the
 624        attributes.
 625
 626        In the end it prints a summary of the parameters
 627        to the console.
 628
 629        Parameters
 630        ----------
 631        None
 632        
 633        Returns
 634        -------
 635        None
 636        """
 637        d = self._read_json()
 638        self.project = d["project"]
 639        self.project_folder = d["project_folder"]
 640        wfolder = d["work_folder"]
 641        if not os.path.exists(wfolder):
 642            wfolder = "/tmp"  # In case work_folder does not exist, use /tmp
 643        self.work_folder = wfolder
 644        # read parameters into Parameter class attributes
 645        self.cube_shape = tuple(d["cube_shape"])
 646        self.incident_angles = tuple(d["incident_angles"])
 647        self.digi = d["digi"]
 648        self.infill_factor = d["infill_factor"]
 649        self.lyr_stdev = d["initial_layer_stdev"]
 650        self.thickness_min = d["thickness_min"]
 651        self.thickness_max = d["thickness_max"]
 652        self.seabed_min_depth = d["seabed_min_depth"]
 653        self.snr_db = d["signal_to_noise_ratio_db"]
 654        # self.random_depth_perturb = d['random_depth_perturb_range']
 655        self.bandwidth_low = d["bandwidth_low"]
 656        self.bandwidth_high = d["bandwidth_high"]
 657        self.bandwidth_ord = d["bandwidth_ord"]
 658        self.dip_factor_max = d["dip_factor_max"]
 659        self.min_number_faults = d["min_number_faults"]
 660        self.max_number_faults = d["max_number_faults"]
 661        self.basin_floor_fans = d["basin_floor_fans"]
 662        self.pad_samples = d["pad_samples"]
 663        self.qc_plots = d["extra_qc_plots"]
 664        self.verbose = d["verbose"]
 665        self.include_channels = d["include_channels"]
 666        self.include_salt = d["include_salt"]
 667        self.max_column_height = d["max_column_height"]
 668        self.closure_types = d["closure_types"]
 669        self.closure_min_voxels_simple = d["min_closure_voxels_simple"]
 670        self.closure_min_voxels_faulted = d["min_closure_voxels_faulted"]
 671        self.closure_min_voxels_onlap = d["min_closure_voxels_onlap"]
 672        self.partial_voxels = d["partial_voxels"]
 673        self.variable_shale_ng = d["variable_shale_ng"]
 674        self.sand_layer_thickness = d["sand_layer_thickness"]
 675        self.sand_layer_pct_min = d["sand_layer_fraction"]["min"]
 676        self.sand_layer_pct_max = d["sand_layer_fraction"]["max"]
 677        self.hdf_store = d["write_to_hdf"]
 678        self.broadband_qc_volume = d["broadband_qc_volume"]
 679        self.model_qc_volumes = d["model_qc_volumes"]
 680        self.multiprocess_bp = d["multiprocess_bp"]
 681
 682        # print em
 683        self.__repr__()
 684
 685    def _set_test_mode(self, size_x: int = 50, size_y: int = 50) -> None:
 686        """
 687        Set test mode
 688        -------------
 689
 690        Sets whether the parameters for testinf mode. If no size integer
 691        is provided is defaults to 50.
 692
 693        This value is a good minimum because it allows for the 3D model
 694        to be able to contain faults and other objects inside.
 695
 696        Parameters
 697        ----------
 698        size_x : `int`
 699        The parameter that sets the size of the model in the x direction
 700        size_y : `int`
 701        The parameter that sets the size of the model in the y direction
 702        
 703        Returns
 704        -------
 705        None
 706        """
 707
 708        # Set output model folder in work_folder location but with same directory name as project_folder
 709        normpath = (
 710            os.path.normpath(self.project_folder) + "_test_mode_"
 711        )  # strip trailing / if added
 712        new_project_folder = os.path.join(self.work_folder, os.path.basename(normpath))
 713        # Put all folders inside project folder for easy deleting
 714        self.work_folder = new_project_folder
 715        self.project_folder = new_project_folder
 716        self.work_subfolder = os.path.join(
 717            new_project_folder, os.path.basename(self.work_subfolder)
 718        )
 719        if self.runid:
 720            # Append runid if provided
 721            self.temp_folder = f"{self.temp_folder}_{self.runid}__{self.date_stamp}"
 722        else:
 723            self.temp_folder = os.path.abspath(
 724                os.path.join(self.work_folder, f"temp_folder__{self.date_stamp}")
 725            )
 726        # Set smaller sized model
 727        self.cube_shape = tuple([size_x, size_y, self.cube_shape[-1]])
 728        # Print message to user
 729        print(
 730            "{0}\nTesting Mode\nOutput Folder: {1}\nCube_Shape: {2}\n{0}".format(
 731                36 * "-", self.project_folder, self.cube_shape
 732            )
 733        )
 734
 735    def _fault_settings(self) -> None:
 736        """
 737        Set Fault Settings
 738        -------------
 739
 740        Sets the parameters that will be used to generate faults throughout
 741        the synthetic model.
 742
 743        Parameters
 744        ----------
 745        None
 746        
 747        Returns
 748        -------
 749        None
 750        """
 751        # Fault parameters
 752        self.low_fault_throw = 5.0 * self.infill_factor
 753        self.high_fault_throw = 35.0 * self.infill_factor
 754
 755        # mode & clustering are randomly chosen
 756        self.mode = np.random.choice([0, 1, 2], 1)[
 757            0
 758        ]
 759        self.clustering = np.random.choice([0, 1, 2], 1)[0]
 760
 761        if self.mode == 0:
 762            # As random as it can be
 763            self.number_faults = np.random.randint(
 764                self.min_number_faults, self.max_number_faults
 765            )
 766            self.fmode = "random"
 767
 768        elif self.mode == 1:
 769            if self.clustering == 0:
 770                self.fmode = "self_branching"
 771                # Self Branching. avoid large fault
 772                self.number_faults = np.random.randint(
 773                    3, 9
 774                )
 775                self.low_fault_throw = 5.0 * self.infill_factor
 776                self.high_fault_throw = 15.0 * self.infill_factor
 777            if self.clustering == 1:
 778                # Stair case
 779                self.fmode = "stair_case"
 780                self.number_faults = np.random.randint(
 781                    5, self.max_number_faults
 782                )
 783            if self.clustering == 2:
 784                # Relay ramps
 785                self.fmode = "relay_ramp"
 786                self.number_faults = np.random.randint(
 787                    3, 9
 788                )
 789                self.low_fault_throw = 5.0 * self.infill_factor
 790                self.high_fault_throw = 15.0 * self.infill_factor
 791        elif self.mode == 2:
 792            # Horst and graben
 793            self.fmode = "horst_and_graben"
 794            self.number_faults = np.random.randint(
 795                3, 7
 796            )
 797
 798        self.fault_param = [
 799            str(self.mode) + str(self.clustering),
 800            self.number_faults,
 801            self.low_fault_throw,
 802            self.high_fault_throw,
 803        ]
 804
 805    def _get_commit_hash(self) -> str:
 806        """
 807        Get Commit Hash
 808        -------------
 809
 810        Gets the commit hash of the current git repository.
 811
 812        #TODO Explain what this is for exactly
 813
 814        Parameters
 815        ----------
 816        None
 817        
 818        Returns
 819        -------
 820        sha : `str`
 821            The commit hash of the current git repository
 822        """
 823
 824        try:
 825            sha = (
 826                subprocess.check_output(["git", "rev-parse", "HEAD"])
 827                .decode("utf-8")
 828                .strip()
 829            )
 830        except CalledProcessError:
 831            sha = "cwd not a git repository"
 832        return sha
 833
 834    def _write_initial_model_parameters_to_logfile(self) -> None:
 835        """
 836        Write Initial Model Parameters to Logfile
 837        ----------------------------------------
 838
 839        Method that writes the initial parameters set for the model
 840        to the logfile.
 841
 842        Parameters
 843        ----------
 844        None
 845        
 846        Returns
 847        -------
 848        None
 849        """
 850        _sha = self._get_commit_hash()
 851        self.write_to_logfile(
 852            f"SHA: {_sha}", mainkey="model_parameters", subkey="sha", val=_sha
 853        )
 854        self.write_to_logfile(
 855            f"modeling start time: {self.start_time}",
 856            mainkey="model_parameters",
 857            subkey="start_time",
 858            val=self.start_time,
 859        )
 860        self.write_to_logfile(
 861            f"project_folder: {self.project_folder}",
 862            mainkey="model_parameters",
 863            subkey="project_folder",
 864            val=self.project_folder,
 865        )
 866        self.write_to_logfile(
 867            f"work_subfolder: {self.work_subfolder}",
 868            mainkey="model_parameters",
 869            subkey="work_subfolder",
 870            val=self.work_subfolder,
 871        )
 872        self.write_to_logfile(
 873            f"cube_shape: {self.cube_shape}",
 874            mainkey="model_parameters",
 875            subkey="cube_shape",
 876            val=self.cube_shape,
 877        )
 878        self.write_to_logfile(
 879            f"incident_angles: {self.incident_angles}",
 880            mainkey="model_parameters",
 881            subkey="incident_angles",
 882            val=self.incident_angles,
 883        )
 884        self.write_to_logfile(
 885            f"number_faults: {self.number_faults}",
 886            mainkey="model_parameters",
 887            subkey="number_faults",
 888            val=self.number_faults,
 889        )
 890        self.write_to_logfile(
 891            f"lateral_filter_size: {self.lateral_filter_size}",
 892            mainkey="model_parameters",
 893            subkey="lateral_filter_size",
 894            val=self.lateral_filter_size,
 895        )
 896        self.write_to_logfile(
 897            f"salt_inserted: {self.include_salt}",
 898            mainkey="model_parameters",
 899            subkey="salt_inserted",
 900            val=self.include_salt,
 901        )
 902        self.write_to_logfile(
 903            f"salt noise_stretch_factor: {self.noise_stretch_factor:.2f}",
 904            mainkey="model_parameters",
 905            subkey="salt_noise_stretch_factor",
 906            val=self.noise_stretch_factor,
 907        )
 908        self.write_to_logfile(
 909            f"bandpass_bandlimits: {self.lowfreq:.2f}, {self.highfreq:.2f}"
 910        )
 911        self.write_to_logfile(
 912            msg=None,
 913            mainkey="model_parameters",
 914            subkey="bandpass_bandlimit_low",
 915            val=self.lowfreq,
 916        )
 917        self.write_to_logfile(
 918            msg=None,
 919            mainkey="model_parameters",
 920            subkey="bandpass_bandlimit_high",
 921            val=self.highfreq,
 922        )
 923        self.write_to_logfile(
 924            f"sn_db: {self.sn_db:.2f}",
 925            mainkey="model_parameters",
 926            subkey="sn_db",
 927            val=self.sn_db,
 928        )
 929        self.write_to_logfile(
 930            f"initial layer depth stdev (flatness of layer): {self.initial_layer_stdev:.2f}",
 931            mainkey="model_parameters",
 932            subkey="initial_layer_stdev",
 933            val=self.initial_layer_stdev,
 934        )
 935
 936    @staticmethod
 937    def year_plus_fraction() -> str:
 938        # TODO Move this to utils separate module
 939        """
 940        Year Plus Fraction
 941        ----------------------------------------
 942
 943        Method generates a time stamp in the format of
 944        year + fraction of year.
 945
 946        Parameters
 947        ----------
 948        None
 949        
 950        Returns
 951        -------
 952        fraction of the year : str
 953            The time stamp in the format of year + fraction of year
 954
 955        """
 956        now = datetime.datetime.now()
 957        year = now.year
 958        secs_in_year = datetime.timedelta(days=365).total_seconds()
 959        fraction_of_year = (
 960            now - datetime.datetime(year, 1, 1, 0, 0)
 961        ).total_seconds() / secs_in_year
 962        return format(year + fraction_of_year, "14.8f").replace(" ", "")
 963
 964    def hdf_setup(self, hdf_name: str) -> None:
 965        """
 966        Setup HDF files
 967        ---------------
 968
 969        This method sets up the HDF structures
 970
 971        Parameters
 972        ----------
 973        hdf_name : str
 974            The name of the HDF file to be created
 975        
 976        Returns
 977        -------
 978        None
 979        """
 980        num_threads = min(8, mp.cpu_count() - 1)
 981        tables.set_blosc_max_threads(num_threads)
 982        self.hdf_filename = os.path.join(self.temp_folder, hdf_name)
 983        self.filters = tables.Filters(
 984            complevel=5, complib="blosc"
 985        )  # compression with fast write speed
 986        self.h5file = tables.open_file(self.hdf_filename, "w")
 987        self.h5file.create_group("/", "ModelData")
 988
 989    def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray:
 990        
 991        """
 992        HDF Initialize
 993        ----------------------------------------
 994
 995        Method that initializes the HDF chunked
 996        array
 997
 998        Parameters
 999        ----------
1000        dset_name : str
1001            The name of the dataset to be created
1002        shape : tuple
1003
1004        
1005        Returns
1006        -------
1007        new_array: tables.CArray
1008        """
1009        if "float" in dtype:
1010            atom = tables.FloatAtom()
1011        elif "uint8" in dtype:
1012            atom = tables.UInt8Atom()
1013        else:
1014            atom = tables.IntAtom()
1015        group = self.h5file.root.ModelData
1016        new_array = self.h5file.create_carray(
1017            group, dset_name, atom, shape, filters=self.filters
1018        )
1019        return new_array
1020
1021    def hdf_node_list(self):
1022        return [x.name for x in self.h5file.list_nodes("ModelData")]
1023
1024    def hdf_remove_node_list(self, dset_name):
1025        group = self.h5file.root.ModelData
1026        try:
1027            self.h5file.remove_node(group, dset_name)
1028        except:
1029            pass
1030        self.hdf_node_list

Parameter object storing all model parameters.

Attributes
  • model_dir_name (str): This is the name that the directory will be given, by default seismic
  • parameter_file (str): User parameters are read from the 'user_config' json file, and additional model parameters are set.
  • test_mode (int): If test_mode is set using an integer, the size of the model will be reduced e.g 100 makes a 100x100.It reduces the ammount of time that the program takes to generate data usueful when testing a model.

    Warning: If you put too small a number, the model may fail due to not enough space to place faults etc...

    Value should ideally be >= 50

  • runid (str): The string runid will be added to the final model directory.
  • rpm_scaling_factors (dict): These are user-defined parameter. You can use the defaults provided, but results might be unrealistic. These might need to be tuned to get reaslistic synthetic data.
  • sqldict (dict): This is a dictionary structure that stores all the parameters of the model. This dictionary eventually gets written to a sqlite DB file.
Methods

setup_model(rpm_factors=None) -> None: Method to set up all the necesary parameters to start a new model. make_directories() -> None: Method that generates all necessary directory structures on disk write_key_file(): Method to generate a key file that describes coordinate systems, track, bin, digi (inline, xlines, ) write_to_logfile(msg, mainkey=None, subkey=None, val=""): Method that writes to the logfile

Parameters( user_config: str = '/glb/data/geocrawler2/users/usgvat/Developer/geocrawler-datagenerator/config/config_ht.json', test_mode=None, runid=None)
73    def __init__(self, user_config: str = CONFIG_PATH, test_mode=None, runid=None):
74        """
75        Initialize the Parameters object.
76
77        Parameters
78        ----------
79        user_config : `str`, optional
80            This is the path on disk that points to a `.json` file
81            that contains the configurations for each run, by default CONFIG_PATH
82        test_mode : `int`, optional
83            The parameter that sets the running mode, by default 0
84        runid : `str`, optional
85            This is the runid of the run, this comes in handy when you have many runs
86            with various permutations of parameters, by default None
87        """
88        # reset the parameter dict in case we are building models within a loop, and the shared_state dict is not empty
89        self._shared_state = {}
90        super().__init__()
91        self.model_dir_name: str = "seismic"
92        self.parameter_file = user_config
93        self.test_mode = test_mode
94        self.runid = runid
95        self.rpm_scaling_factors = dict()
96        self.sqldict = defaultdict(dict)

Initialize the Parameters object.

Parameters
  • user_config (str, optional): This is the path on disk that points to a .json file that contains the configurations for each run, by default CONFIG_PATH
  • test_mode (int, optional): The parameter that sets the running mode, by default 0
  • runid (str, optional): This is the runid of the run, this comes in handy when you have many runs with various permutations of parameters, by default None
def setup_model(self, rpm_factors=None) -> None:
128    def setup_model(self, rpm_factors=None) -> None:
129        """
130        Setup Model
131        -----------
132        Sets up the creation of essential parameters and directories
133
134        Parameters
135        ----------
136        rpm_factors : `dict`, optional
137            The rock physics model factors for generating the synthetic cube.
138            By default the rpm factors come from a default in the main.py file
139
140        Returns
141        -------
142        None
143        """
144        # Set model parameters
145        self._set_model_parameters(self.model_dir_name)
146        self.make_directories()
147        self.write_key_file()
148        self._setup_rpm_scaling_factors(rpm_factors)
149
150        # Write model parameters to logfile
151        self._write_initial_model_parameters_to_logfile()
Setup Model

Sets up the creation of essential parameters and directories

Parameters
  • rpm_factors (dict, optional): The rock physics model factors for generating the synthetic cube. By default the rpm factors come from a default in the main.py file
Returns
  • None
def make_directories(self) -> None:
153    def make_directories(self) -> None:
154        """
155        Make directories.
156        -----------------
157
158        Creates the necessary directories to run the model.
159
160        This function creates the directories on disk
161        necessary for the model to run.
162
163        Parameters
164        ----------
165        self : `Parameters`
166
167        Returns
168        -------
169        None
170        """
171        print(f"\nModel folder: {self.work_subfolder}")
172        self.sqldict["model_id"] = pathlib.Path(self.work_subfolder).name
173        for folder in [self.project_folder, self.work_subfolder, self.temp_folder]:
174            try:
175                os.stat(folder)
176            except OSError:
177                print(f"Creating directory: {folder}")
178                # Try making folders (can fail if multiple models are being built simultaneously in a new dir)
179                try:
180                    os.mkdir(folder)
181                except OSError:
182                    pass
183        try:
184            os.system(f"chmod -R 777 {self.work_subfolder}")
185        except OSError:
186            print(f"Could not chmod {self.work_subfolder}. Continuing...")
187            pass

Make directories.

Creates the necessary directories to run the model.

This function creates the directories on disk necessary for the model to run.

Parameters
Returns
  • None
def write_key_file(self) -> None:
189    def write_key_file(self) -> None:
190        """
191        Write key file
192        --------------
193
194        Writes a file that ocntains important parameters about the cube.
195
196        Method that writes important parameters about the synthetic cube
197        such as coordinate transforms and sizes.
198
199        Parameters
200        ----------
201        None
202
203        Returns
204        ----------
205        None
206        """
207        # Set plausible key file values
208        geom_expand = dict()
209        geom_expand["3D_NAME"] = "synthetic data for training"
210        geom_expand["COORD_METHOD"] = 1
211        geom_expand["DATA_TYPE"] = "3D"
212        geom_expand["DELTA_BIN_NUM"] = 1
213        geom_expand["DELTA_TRACK_NUM"] = 1
214        geom_expand["DIGITIZATION"] = 4
215        geom_expand["EPSG_CRS"] = 32066
216        geom_expand["FIRST_BIN"] = 1000
217        geom_expand["FIRST_TRACK"] = 2000
218        geom_expand["FORMAT"] = 1
219        geom_expand["N_BIN"] = self.cube_shape[1]
220        geom_expand["N_SAMP"] = self.cube_shape[2]
221        geom_expand["N_TRACK"] = self.cube_shape[0]
222        geom_expand["PROJECTION"] = 316
223        geom_expand["REAL_DELTA_X"] = 100.0
224        geom_expand["REAL_DELTA_Y"] = 100.0
225        geom_expand["REAL_GEO_X"] = 1250000.0
226        geom_expand["REAL_GEO_Y"] = 10500000.0
227        geom_expand["SKEW_ANGLE"] = 0.0
228        geom_expand["SUBPOINT_CODE"] = "TTTBBB"
229        geom_expand["TIME_OR_DEPTH"] = "TIME"
230        geom_expand["TRACK_DIR"] = "H"
231        geom_expand["XFORM_TO_WGS84"] = 1241
232        geom_expand["ZERO_TIME"] = 0
233
234        # Write the keyfile
235        outputkey = os.path.join(
236            self.work_subfolder, "seismicCube_" + self.date_stamp + ".key"
237        )
238        with open(outputkey, "w") as key:
239            key.write(
240                "{}MESSAGE_FILE\n".format(20 * " ")
241            )  # spaces are important here.. Require 20 of them
242            key.write("3D_NAME C %s\n" % geom_expand["3D_NAME"])
243            key.write("COORD_METHOD I %d\n" % int(geom_expand["COORD_METHOD"]))
244            key.write("DATA_TYPE C %s\n" % geom_expand["DATA_TYPE"])
245            key.write("DELTA_BIN_NUM I %d\n" % int(geom_expand["DELTA_BIN_NUM"]))
246            key.write("DELTA_TRACK_NUM I %d\n" % int(geom_expand["DELTA_TRACK_NUM"]))
247            key.write("DIGITIZATION I %d\n" % int(geom_expand["DIGITIZATION"]))
248            key.write("EPSG_CRS I %d\n" % int(geom_expand["EPSG_CRS"]))
249            key.write("FIRST_BIN I %d\n" % int(geom_expand["FIRST_BIN"]))
250            key.write("FIRST_TRACK I %d\n" % int(geom_expand["FIRST_TRACK"]))
251            key.write("FORMAT I %d\n" % int(geom_expand["FORMAT"]))
252            key.write("N_BIN I %d\n" % int(geom_expand["N_BIN"]))
253            key.write("N_SAMP I %d\n" % int(geom_expand["N_SAMP"]))
254            key.write("N_TRACK I %d\n" % int(geom_expand["N_TRACK"]))
255            key.write("PROJECTION I %d\n" % int(geom_expand["PROJECTION"]))
256            key.write("REAL_DELTA_X R %f\n" % float(geom_expand["REAL_DELTA_X"]))
257            key.write("REAL_DELTA_Y R %f\n" % float(geom_expand["REAL_DELTA_Y"]))
258            key.write("REAL_GEO_X R %f\n" % float(geom_expand["REAL_GEO_X"]))
259            key.write("REAL_GEO_Y R %f\n" % float(geom_expand["REAL_GEO_Y"]))
260            key.write("SKEW_ANGLE R %f\n" % float(geom_expand["SKEW_ANGLE"]))
261            key.write("SUBPOINT_CODE C %s\n" % geom_expand["SUBPOINT_CODE"])
262            key.write("TIME_OR_DEPTH C %s\n" % geom_expand["TIME_OR_DEPTH"])
263            key.write("TRACK_DIR C %s\n" % geom_expand["TRACK_DIR"])
264            key.write("XFORM_TO_WGS84 I %d\n" % int(geom_expand["XFORM_TO_WGS84"]))
265            key.write("ZERO_TIME I %d\n" % int(geom_expand["ZERO_TIME"]))
266        print(f"\nKeyfile created at {outputkey}")
Write key file

Writes a file that ocntains important parameters about the cube.

Method that writes important parameters about the synthetic cube such as coordinate transforms and sizes.

Parameters
  • None
Returns
  • None
def write_to_logfile(self, msg, mainkey=None, subkey=None, val='') -> None:
268    def write_to_logfile(self, msg, mainkey=None, subkey=None, val="") -> None:
269        """
270        write_to_logfile
271
272        Method to write msg to model_parameter file
273        (includes newline)
274
275        Parameters
276        ----------
277        msg : `string`
278        Required string object that will be written tom model parameter file.
279        mainkey : `string`
280        String of the key to be written into de sql dictionary.
281        subkey : `string`
282        String of the subkey to be written into de sql dictionary.
283        val : `string`
284        String of the value that should be written into the sql dictionary.
285        
286        Returns
287        -------
288        None
289        """
290        if msg is not None:
291            with open(self.logfile, "a") as f:
292                f.write(f"{msg}\n")
293        if mainkey is not None:
294            self.sqldict[mainkey][subkey] = val
295            # for k, v in self.sqldict.items():
296            #     print(f"{k}: {v}")

write_to_logfile

Method to write msg to model_parameter file (includes newline)

Parameters
  • msg (string):

  • Required string object that will be written tom model parameter file.

  • mainkey (string):

  • String of the key to be written into de sql dictionary.

  • subkey (string):

  • String of the subkey to be written into de sql dictionary.

  • val (string):

  • String of the value that should be written into the sql dictionary.

Returns
  • None
def write_sqldict_to_logfile(self, logfile=None) -> None:
298    def write_sqldict_to_logfile(self, logfile=None) -> None:
299        """
300        write_sqldict_to_logfile
301
302        Write the sql dictionary to the logfile
303
304        Parameters
305        ----------
306        logfile : `string`
307        The path to the log file. By default None
308        
309        Returns
310        -------
311        None
312        """
313        if logfile is None:
314            logfile = self.logfile
315        with open(logfile, "a") as f:
316            for k, nested in self.sqldict.items():
317                print(k, file=f)
318                if k == "model_id":
319                    print(f"\t{nested}", file=f)
320                else:
321                    for subkey, value in nested.items():
322                        print(f"\t{subkey}: {value}", file=f)
323                print(file=f)

write_sqldict_to_logfile

Write the sql dictionary to the logfile

Parameters
  • logfile (string):

  • The path to the log file. By default None

Returns
  • None
def write_sqldict_to_db(self) -> None:
325    def write_sqldict_to_db(self) -> None:
326        """
327        write_sqldict_to_db
328
329        Method to write the sqldict to database sqlite file
330
331        Parameters
332        ----------
333        None
334        
335        Returns
336        -------
337        None
338        """
339        model_id = pathlib.Path(self.work_subfolder).name
340        model_parameters = self.sqldict["model_parameters"]
341        fault_keys = [k for k in self.sqldict.keys() if "fault" in k]
342        closure_keys = [k for k in self.sqldict.keys() if "closure" in k]
343
344        conn = sqlite3.connect(os.path.join(self.work_subfolder, "parameters.db"))
345        # tables = ["model_parameters", "fault_parameters", "closure_parameters"]
346        # create tables
347        sql = f"CREATE TABLE model_parameters (model_id string primary key, {','.join(model_parameters.keys())})"
348        conn.execute(sql)
349        # insert model_parameters
350        columns = "model_id, " + ", ".join(model_parameters.keys())
351        placeholders = ", ".join("?" * (len(model_parameters) + 1))
352        sql = f"INSERT INTO model_parameters ({columns}) VALUES ({placeholders})"
353        values = tuple([model_id] + [str(x) for x in model_parameters.values()])
354        conn.execute(sql, values)
355        conn.commit()
356
357        # fault parameters
358        if len(fault_keys) > 0:
359            f = tuple(self.sqldict[fault_keys[0]].keys())
360            sql = f"CREATE TABLE fault_parameters ({','.join(f)})"
361            conn.execute(sql)
362            columns = ", ".join(self.sqldict[fault_keys[0]].keys())
363            placeholders = ", ".join("?" * len(self.sqldict[fault_keys[0]].keys()))
364            for f in fault_keys:
365                sql = (
366                    f"INSERT INTO fault_parameters ({columns}) VALUES ({placeholders})"
367                )
368                conn.execute(sql, tuple(self.sqldict[f].values()))
369                conn.commit()
370
371        if len(closure_keys) > 0:
372            c = tuple(self.sqldict[closure_keys[0]].keys())
373            sql = f"CREATE TABLE closure_parameters ({','.join(c)})"
374            conn.execute(sql)
375            columns = ", ".join(self.sqldict[closure_keys[0]].keys())
376            placeholders = ", ".join("?" * len(self.sqldict[closure_keys[0]].keys()))
377            for c in closure_keys:
378                sql = f"INSERT INTO closure_parameters ({columns}) VALUES ({placeholders})"
379                conn.execute(sql, tuple(self.sqldict[c].values()))
380                conn.commit()

write_sqldict_to_db

Method to write the sqldict to database sqlite file

Parameters
  • None
Returns
  • None
@staticmethod
def year_plus_fraction() -> str:
936    @staticmethod
937    def year_plus_fraction() -> str:
938        # TODO Move this to utils separate module
939        """
940        Year Plus Fraction
941        ----------------------------------------
942
943        Method generates a time stamp in the format of
944        year + fraction of year.
945
946        Parameters
947        ----------
948        None
949        
950        Returns
951        -------
952        fraction of the year : str
953            The time stamp in the format of year + fraction of year
954
955        """
956        now = datetime.datetime.now()
957        year = now.year
958        secs_in_year = datetime.timedelta(days=365).total_seconds()
959        fraction_of_year = (
960            now - datetime.datetime(year, 1, 1, 0, 0)
961        ).total_seconds() / secs_in_year
962        return format(year + fraction_of_year, "14.8f").replace(" ", "")
Year Plus Fraction

Method generates a time stamp in the format of year + fraction of year.

Parameters
  • None
Returns
  • fraction of the year (str): The time stamp in the format of year + fraction of year
def hdf_setup(self, hdf_name: str) -> None:
964    def hdf_setup(self, hdf_name: str) -> None:
965        """
966        Setup HDF files
967        ---------------
968
969        This method sets up the HDF structures
970
971        Parameters
972        ----------
973        hdf_name : str
974            The name of the HDF file to be created
975        
976        Returns
977        -------
978        None
979        """
980        num_threads = min(8, mp.cpu_count() - 1)
981        tables.set_blosc_max_threads(num_threads)
982        self.hdf_filename = os.path.join(self.temp_folder, hdf_name)
983        self.filters = tables.Filters(
984            complevel=5, complib="blosc"
985        )  # compression with fast write speed
986        self.h5file = tables.open_file(self.hdf_filename, "w")
987        self.h5file.create_group("/", "ModelData")
Setup HDF files

This method sets up the HDF structures

Parameters
  • hdf_name (str): The name of the HDF file to be created
Returns
  • None
def hdf_init( self, dset_name, shape: tuple, dtype: str = 'float64') -> tables.carray.CArray:
 989    def hdf_init(self, dset_name, shape: tuple, dtype: str = "float64") -> tables.CArray:
 990        
 991        """
 992        HDF Initialize
 993        ----------------------------------------
 994
 995        Method that initializes the HDF chunked
 996        array
 997
 998        Parameters
 999        ----------
1000        dset_name : str
1001            The name of the dataset to be created
1002        shape : tuple
1003
1004        
1005        Returns
1006        -------
1007        new_array: tables.CArray
1008        """
1009        if "float" in dtype:
1010            atom = tables.FloatAtom()
1011        elif "uint8" in dtype:
1012            atom = tables.UInt8Atom()
1013        else:
1014            atom = tables.IntAtom()
1015        group = self.h5file.root.ModelData
1016        new_array = self.h5file.create_carray(
1017            group, dset_name, atom, shape, filters=self.filters
1018        )
1019        return new_array
HDF Initialize

Method that initializes the HDF chunked array

Parameters
  • dset_name (str): The name of the dataset to be created
  • shape (tuple):
Returns
  • new_array (tables.CArray):
def hdf_node_list(self):
1021    def hdf_node_list(self):
1022        return [x.name for x in self.h5file.list_nodes("ModelData")]
def hdf_remove_node_list(self, dset_name):
1024    def hdf_remove_node_list(self, dset_name):
1025        group = self.h5file.root.ModelData
1026        try:
1027            self.h5file.remove_node(group, dset_name)
1028        except:
1029            pass
1030        self.hdf_node_list
def triangle_distribution_fix(left, mode, right, random_seed=None):
1033def triangle_distribution_fix(left, mode, right, random_seed=None):
1034    """
1035    Triangle Distribution Fix
1036    -------------------------
1037
1038    Draw samples from the triangular distribution over the interval [left, right] with modifications.
1039
1040    Ensure some values are drawn at the left and right values by enlarging the interval to
1041    [left - (mode - left), right + (right - mode)]
1042
1043    Parameters
1044    ----------
1045    left: `float`
1046        lower limit
1047    mode: `float`
1048        mode
1049    right: `float`
1050        upper limit
1051    random_seed: `int`
1052        seed to set numpy's random seed
1053
1054    Returns
1055    -------
1056    sn_db: `float`
1057        Drawn samples from parameterised triangular distribution
1058    """
1059    sn_db = 0
1060    while sn_db < left or sn_db > right:
1061        if random_seed:
1062            np.random.seed(random_seed)
1063        sn_db = np.random.triangular(left - (mode - left), mode, right + (right - mode))
1064
1065    return sn_db
Triangle Distribution Fix

Draw samples from the triangular distribution over the interval [left, right] with modifications.

Ensure some values are drawn at the left and right values by enlarging the interval to [left - (mode - left), right + (right - mode)]

Parameters
  • left (float): lower limit
  • mode (float): mode
  • right (float): upper limit
  • random_seed (int): seed to set numpy's random seed
Returns
  • sn_db (float): Drawn samples from parameterised triangular distribution