Add package george to setup.py dependencies

JBjoernskov · Jan 11, 2024 · 500837c · 500837c
1 parent 185f29f
commit 500837c
Show file tree

Hide file tree

Showing 8 changed files with 120 additions and 66 deletions.
diff --git a/setup.py b/setup.py
@@ -30,7 +30,8 @@
         "ptemcee @ git+https://github.com/willvousden/ptemcee.git@c06ffef47eaf9e371a3d629b4d28fb3cecda56b4",
         "scipy",
         "fastapi",
-        "numpy"
+        "numpy",
+        "george"
     ],
     classifiers=["Programming Language :: Python :: 3"],
 )
diff --git a/twin4build/estimator/estimator.py b/twin4build/estimator/estimator.py
@@ -103,10 +103,11 @@ def run_emcee_estimation(self,
                              n_temperature=15,
                              fac_walker=2,
                              T_max=np.inf,
-                             n_cores=multiprocessing.cpu_count()-2,
+                             n_cores=multiprocessing.cpu_count(),
                              prior="uniform",
                              walker_initialization="uniform",
-                             assume_uncorrelated_noise=True):
+                             assume_uncorrelated_noise=True,
+                             use_simulated_annealing=False):
         assert n_cores>=1, "The argument \"n_cores\" must be larger than or equal to 1"
         assert fac_walker>=2, "The argument \"fac_walker\" must be larger than or equal to 2"
         allowed_priors = ["uniform", "gaussian"]
@@ -117,7 +118,7 @@ def run_emcee_estimation(self,
         assert np.all(self.x0<=self.ub), "The provided x0 must be smaller than the provided upper bound ub"
         assert np.all(np.abs(self.x0-self.lb)>self.tol), f"The difference between x0 and lb must be larger than {str(self.tol)}. {np.array(self.flat_attr_list)[(np.abs(self.x0-self.lb)>self.tol)==False]} violates this condition." 
         assert np.all(np.abs(self.x0-self.ub)>self.tol), f"The difference between x0 and ub must be larger than {str(self.tol)}. {np.array(self.flat_attr_list)[(np.abs(self.x0-self.ub)>self.tol)==False]} violates this condition."
-
+        self.use_simulated_annealing = use_simulated_annealing
         self.model.make_pickable()
         self.model.cache(stepSize=self.stepSize,
                         startTime=self.startTime_train,
@@ -145,30 +146,32 @@ def run_emcee_estimation(self,
                 source_component = [cp.connectsSystemThrough.connectsSystem for cp in measuring_device.connectsAt][0]
                 self.n_par += len(source_component.input)+1
                 self.n_par_map[measuring_device.id] = len(source_component.input)+1
-            print(self.n_par)
-            print(self.n_par_map)
+
             loglike = self._loglike_gaussian_process_wrapper
             ndim = ndim+self.n_par
             self.x0 = np.append(self.x0, np.zeros((self.n_par,)))
             bound = 5
             self.lb = np.append(self.lb, -bound*np.ones((self.n_par,)))
             self.ub = np.append(self.ub, bound*np.ones((self.n_par,)))
+            self.standardDeviation_x0 = np.append(self.standardDeviation_x0, bound/2*np.ones((self.n_par,)))
         else:
             loglike = self._loglike_wrapper
+
+        if self.use_simulated_annealing:
+            assert n_temperature==1, "Simulated can only be used if \"n_temperature\" is 1."
 
         n_walkers = int(ndim*fac_walker) #*4 #Round up to nearest even number and multiply by 2
 
         if walker_initialization=="uniform":
             x0_start = np.random.uniform(low=self.lb, high=self.ub, size=(n_temperature, n_walkers, ndim))
         elif walker_initialization=="gaussian":
-            scale = np.append(self.standardDeviation_x0, bound/2*np.ones((self.n_par,)))
-            x0_start = np.random.normal(loc=self.x0, scale=scale, size=(n_temperature, n_walkers, ndim))
+            x0_start = np.random.normal(loc=self.x0, scale=self.standardDeviation_x0, size=(n_temperature, n_walkers, ndim))
             lb = np.resize(self.lb,(x0_start.shape))
             ub = np.resize(self.ub,(x0_start.shape))
             x0_start[x0_start<self.lb] = lb[x0_start<self.lb]
             x0_start[x0_start>self.ub] = ub[x0_start>self.ub]
         elif walker_initialization=="ball":
-            x0_start = np.random.uniform(low=self.x0-1e-5, high=self.ub+1e-5, size=(n_temperature, n_walkers, ndim))
+            x0_start = np.random.uniform(low=self.x0-1e-5, high=self.x0+1e-5, size=(n_temperature, n_walkers, ndim))
             lb = np.resize(self.lb,(x0_start.shape))
             ub = np.resize(self.ub,(x0_start.shape))
             x0_start[x0_start<self.lb] = lb[x0_start<self.lb]
@@ -188,6 +191,10 @@ def run_emcee_estimation(self,
                           adaptive=adaptive,
                           betas=betas,
                           mapper=pool.imap)
+
+        burnin = 500
+        betas = np.linspace(0, 1, burnin)[1:]
+        self.beta = 0
         chain = sampler.chain(x0_start)
         n_save_checkpoint = 50 if n_sample>=50 else 1
         result = {"integratedAutoCorrelatedTime": [],
@@ -210,8 +217,10 @@ def run_emcee_estimation(self,
                     "n_par": self.n_par,
                     "n_par_map": self.n_par_map
                     }
-
+        
         for i, ensemble in tqdm(enumerate(chain.iterate(n_sample)), total=n_sample):
+            if i<burnin-1:
+                self.beta = float(betas[i])
             result["integratedAutoCorrelatedTime"].append(chain.get_acts())
             result["chain.jumps_accepted"].append(chain.jumps_accepted.copy())
             result["chain.jumps_proposed"].append(chain.jumps_proposed.copy())
@@ -291,10 +300,20 @@ def _obj_fun_MCMC(self, x, data):
         return self.loss/self.T
 
     def _loglike_wrapper(self, theta):
+        outsideBounds = np.any(theta<self.lb) or np.any(theta>self.ub)
+        if outsideBounds:
+            return -1e+10
+
         try:
             loglike = self._loglike(theta)
         except FMICallException as inst:
-            loglike = -1e+10
+            return -1e+10
+
+        if self.use_simulated_annealing:
+            # Python returns a complex number for (-x)**y where x and y is positive. Python returns the real numbered root for -(x)**y where x and y is positive. 
+            # Therefore abs is used to convert the negative loglike and then the sign is added after taking the power. 
+            loglike = loglike*self.beta
+
         return loglike
 
     def _loglike(self, theta):
@@ -342,10 +361,20 @@ def _loglike(self, theta):
         return loglike
 
     def _loglike_gaussian_process_wrapper(self, theta):
+        outsideBounds = np.any(theta<self.lb) or np.any(theta>self.ub)
+        if outsideBounds:
+            return -1e+10
+
         try:
             loglike = self._loglike_gaussian_process(theta)
         except FMICallException as inst:
-            loglike = -1e+10
+            return -1e+10
+
+        if self.use_simulated_annealing:
+            # Python returns a complex number for (-x)**y where x and y is positive. Python returns the real numbered root for -(x)**y where x and y is positive. 
+            # Therefore abs is used to convert the negative loglike and then the sign is added after taking the power. 
+            loglike = loglike*self.beta
+
         return loglike
 
     def _loglike_gaussian_process(self, theta):
@@ -357,9 +386,7 @@ def _loglike_gaussian_process(self, theta):
         # x = theta[:-n_sigma]
         # sigma = theta[-n_sigma:]
 
-        outsideBounds = np.any(theta<self.lb) or np.any(theta>self.ub)
-        if outsideBounds: #####################################################h
-            return -1e+10
+
 
         theta_kernel = np.exp(theta[-self.n_par:])
         theta = theta[:-self.n_par]
@@ -406,7 +433,7 @@ def _loglike_gaussian_process(self, theta):
             print("=================")
             print("")
 
-        return loglike
+        return float(loglike)
 
     def uniform_logprior(self, theta):
         outsideBounds = np.any(theta<self.lb) or np.any(theta>self.ub)

diff --git a/twin4build/estimator/tests/test_estimator_wbypass.py b/twin4build/estimator/tests/test_estimator_wbypass.py
@@ -57,7 +57,6 @@ def test_estimator():
         idx = [i for i, j in enumerate(model.chain_log["component_id"]) if j == com_id]
         # idx = model.chain_log["component_id"].index(com_id)
         x0[model.component_dict[com_id]] = x0_[idx]
-    print(x0)
     del x
     del loglike
     del model.chain_log
@@ -81,9 +80,10 @@ def test_estimator():
                 "n_temperature": 1, #Number of parallel chains/temperatures.
                 "fac_walker": 8, #Scaling factor for the number of ensemble walkers per chain. Minimum is 2.
                 "prior": "uniform", #Prior distribution - "gaussian" is also implemented
-                "walker_initialization": "ball",#Initialization of parameters - "gaussian" is also implemented
+                "walker_initialization": "uniform",#Initialization of parameters - "gaussian" is also implemented
                 "n_cores": 8,
-                "assume_uncorrelated_noise": False
+                "assume_uncorrelated_noise": True,
+                "use_simulated_annealing": True
                 }
 
     estimator.estimate(x0=x0,

diff --git a/twin4build/estimator/tests/test_load_emcee_chain.py b/twin4build/estimator/tests/test_load_emcee_chain.py
@@ -48,13 +48,13 @@ def test_load_emcee_chain():
     sky_blue = colors[9]
     # plot.load_params()
 
-    do_iac_plot = False
-    do_logl_plot = False
-    do_trace_plot = False
-    do_swap_plot = False
-    do_jump_plot = False
-    do_corner_plot = False
-    do_inference = True
+    do_iac_plot = True
+    do_logl_plot = True
+    do_trace_plot = True
+    do_swap_plot = True
+    do_jump_plot = True
+    do_corner_plot = True
+    do_inference = False
 
     # loaddir = os.path.join(uppath(os.path.abspath(__file__), 2), "chain_logs", "20230829_155706_chain_log.pickle")
     # loaddir = os.path.join(uppath(os.path.abspath(__file__), 2), "chain_logs", "20230830_194210_chain_log.pickle")
@@ -114,20 +114,27 @@ def test_load_emcee_chain():
     loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240107_224328_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
     loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240107_224328_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
     loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240108_175437_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
+    loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240109_110253_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
+    loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240109_143730_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
+    loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240110_093807_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
+    loaddir = os.path.join(uppath(os.path.abspath(__file__), 1), "generated_files", "model_parameters", "chain_logs", "model_20240110_141839_.pickle") #15 temps , 8*walkers, 30tau, test bypass valve, lower massflow and pressure, gaussian prior, GlycolEthanol, valve more parameters, lower UA, lower massflow, Kp
 
 
     with open(loaddir, 'rb') as handle:
         result = pickle.load(handle)
         result["chain.T"] = 1/result["chain.betas"] ##################################
 
 
-    burnin = int(result["chain.x"].shape[0])-500 #800
+    burnin = int(result["chain.x"].shape[0])-1 #800
 #########################################
     list_ = ["integratedAutoCorrelatedTime", "chain.jumps_accepted", "chain.jumps_proposed", "chain.swaps_accepted", "chain.swaps_proposed"]
     for key in list_:
         result[key] = np.array(result[key])
 #########################################
 
+    vmin = np.min(result["chain.betas"])
+    vmax = np.max(result["chain.betas"])
+
 
     # for key in result.keys():
     #     if key not in list_:
@@ -209,15 +216,15 @@ def test_load_emcee_chain():
     if do_inference:
         model.load_chain_log(loaddir)
         startTime_train = datetime.datetime(year=2022, month=2, day=1, hour=8, minute=0, second=0, tzinfo=tz.gettz("Europe/Copenhagen"))
-        endTime_train = datetime.datetime(year=2022, month=2, day=2, hour=22, minute=0, second=0, tzinfo=tz.gettz("Europe/Copenhagen"))
+        endTime_train = datetime.datetime(year=2022, month=2, day=1, hour=22, minute=0, second=0, tzinfo=tz.gettz("Europe/Copenhagen"))
         model.chain_log["startTime_train"] = startTime_train
         model.chain_log["endTime_train"] = endTime_train
         model.chain_log["stepSize_train"] = stepSize
         model.chain_log["n_par"] = n_par
         model.chain_log["n_par_map"] = n_par_map
         parameter_chain = result["chain.x"][burnin:,0,:,:]
-        # del result
-        # del model.chain_log["chain.x"]
+        del result
+        del model.chain_log["chain.x"]
         # parameter_chain = result["chain.x"][-1:,0,:,:] #[-1:,0,:,:]
         parameter_chain = parameter_chain.reshape((parameter_chain.shape[0]*parameter_chain.shape[1], parameter_chain.shape[2]))
         fig, axes = simulator.run_emcee_inference(model, parameter_chain, targetParameters, targetMeasuringDevices, startTime, endTime, stepSize, assume_uncorrelated_noise=False)
@@ -290,8 +297,7 @@ def test_load_emcee_chain():
 
     # vmin = np.min(result["chain.T"])
     # vmax = np.max(result["chain.T"])
-    vmin = np.min(result["chain.betas"])
-    vmax = np.max(result["chain.betas"])
+