MUD Demo: Aggregating Spatial Measurements into a QoI Map
We find that using sensors on the top/bottom half of the domain to define each respective QoI component yields more precise solutions to the inverse problem.
However, in general we do not have a procedure for determining how to combine measurements into geometrically optimal QoI maps.
This page illustrates how the Singular Value Decomposition (SVD) can be used to define the mapping and compares it to the solution when using the horizontal split pictured below (QoI components colored red/black).
The SVD must be performed on scaled data to remove the effect of magnitude differences in the measurements, and reveals two dominant directions:
We can visualize how the sensor data is weighted by coloring each measurement location by the corresponding component value in each of the first two singular vectors.
The truncated basis of singular vectors is used to map the z-scored residual samples to a 2-D data space.
The first QoI is effectively an average value as its components demonstrate little variability (orange) by comparison to the second QoI (blue), which is weighing spatial information.
If we visualize the QoI contour maps for each component, we can see that the new map we discover with the SVD has orthogonal contour structures in the two components:
Finally we find that the SVD map identifies similar solutions on average to the horizontal split that was chosen by use of researcher intuition (refresh the page to simulate new measurement noise). Both solutions are similar to the optimal solution in this basis (red dotted line), if not sometimes identical.
import numpy as np
import scipy.stats.distributions as ds
import matplotlib.pyplot as plt
from mud.base import DensityProblem
import mud_examples.poisson as ps
sample_dist = "u"
input_dim = 2
prefix = 1000
fdir = f"pde_{input_dim}D"
fname = f"data/{fdir}/ref_{prefix}_{input_dim}{sample_dist}.pkl"
P = ps.pdeProblem(fname)
P.load()
if sample_dist == "n":
P.dist = ds.norm # required for generator functions
loc = -2.0
scale = 0.2
else:
P.dist = ds.uniform
loc = -4.0
scale = 4.0
# DEFINE PROBLEM PARAMS
num_obs = 500
sd = 0.05
num_samples, num_max_qoi = P.qoi.shape
print(f"There are {num_max_qoi} sensors available and {num_samples} samples of {input_dim}-D parameter space. We will use {num_obs} of them and simulate noise drawn from N(0, {sd}^2)")
# modify this method for new plot based on singular vectors
num_qoi = 2
ps.plot_without_fenics(
fname,
num_sensors=num_obs,
mode="hor",
num_qoi=num_qoi,
example="mud", # TODO: rename this variable
)
plt.gcf()
# # Baseline Solution
# generator function which takes (num_obs, sd) as arguments and returns a mud_problem
mud_generator = P.mud_vector_horizontal(num_qoi, loc=loc, scale=scale)
mud_problem = mud_generator(num_obs, sd) # simulates noisy measurements
# mud_problem.mud_point()
# # Reference Solution
# TODO : turn this snippet into a method
closest_fit_index_out = np.argmin(
np.linalg.norm(P.qoi - np.array(P.qoi_ref), axis=1)
)
g_projected = P.lam[closest_fit_index_out, :].ravel()
lam_true = g_projected
# print(lam_true)
# # SVD Approach
measure_true = P.qoi_ref[0:num_obs]
measurements = P.qoi[:, 0:num_obs]
noise = np.random.randn(num_obs) * sd
data = measure_true + noise
# q = wme(qoi, data, sd).reshape(-1, 1)
from sklearn.preprocessing import StandardScaler
scalar = StandardScaler()
residuals = (measurements - data) / sd
X = scalar.fit_transform(residuals)
U, singular_values, singular_vectors = np.linalg.svd(X)
top_singular_values = 5
inds = np.arange(1, top_singular_values+1)
plt.figure()
plt.plot(inds, singular_values[0:top_singular_values])
plt.xticks(inds)
plt.xlabel("Index", fontsize=12)
plt.ylabel("$\sigma_i$", fontsize=12)
plt.yscale('log')
plt.title(f"Top {top_singular_values} Singular Values of Scaled Residuals", fontsize=16)
plt.gcf()
scalar = StandardScaler()
S = scalar.fit_transform(singular_vectors)
top_singular_vectors = 2
fig, ax = plt.subplots(1, top_singular_vectors, figsize=(1+5*top_singular_vectors, 5), sharey=True)
for i in range(top_singular_vectors):
ax[i].tricontour(P.sensors[0:num_obs, 0], P.sensors[0:num_obs, 1], singular_vectors[i, :], levels=20, alpha=1)
ax[i].scatter(P.sensors[0:num_obs, 0], P.sensors[0:num_obs, 1], s=100, c=singular_vectors[i, :])
ax[i].set_title(f"$\sigma_{i+1}$ = {singular_values[i]:1.2e}")
ax[i].set_xlabel("$x_1$", fontsize=12)
if i == 0: ax[i].set_ylabel("$x_2$", fontsize=12)
fig.suptitle("Singular Vector Components as Weights for Sensors", fontsize=16)
fig
fig, ax = plt.subplots(figsize=(6,4))
# plt.plot((singular_vectors[0, :] + singular_vectors[1, :]))
ax.plot(singular_vectors[0, :], c='xkcd:orange', label="$v_1$", lw=3)
ax.plot(singular_vectors[1, :], c='xkcd:light blue', label="$v_2$", lw=3, zorder=0)
ax.legend(fontsize=12)
ax.set_xlabel("Sensor Index", fontsize=12)
ax.set_ylabel("Singular Vector Component Value", fontsize=12)
plt.tight_layout()
fig
# In[ ]:
# fig, ax = plt.subplots(figsize=(3,3))
# ax.scatter(P.sensors[0:num_obs, 0], P.sensors[0:num_obs, 1], s=100, c= (singular_vectors[0, :] + singular_vectors[1, :]))
# fig.show()
# In[ ]:
num_qoi = 2
new_qoi_map = singular_vectors[0:num_qoi, :]
new_qoi = residuals @ new_qoi_map.T # ok shape, wrong solution, identical to using U@sigma
top_singular_vectors = 2
fig, ax = plt.subplots(1, top_singular_vectors, figsize=(1+5*top_singular_vectors, 5), sharey=True)
for i in range(top_singular_vectors):
ax[i].scatter(P.lam[:,0], P.lam[:,1], s=50, c=new_qoi[:,i])
ax[i].tricontour(P.lam[:,0], P.lam[:,1], new_qoi[:,i], levels=20)
# ax[i].set_title(f"$\sigma_{i}$ = {singular_values[i]:1.2e}")
ax[i].set_title(f"$q_{i+1}$", fontsize=12)
ax[i].set_xlabel("$\lambda_1$", fontsize=12)
if i == 0: ax[i].set_ylabel("$\lambda_2$", fontsize=12)
# fig.supylabel("$\lambda_2$")
fig.suptitle("QoI Component Surface Plot", fontsize=16)
fig
d = DensityProblem(P.lam, new_qoi, P.domain, weights=None)
#print(d.mud_point())
P.plot()
plt.plot(np.linspace(0, 1, input_dim + 2), [0] + list(d.mud_point()) + [0], lw=5, c='blue', label='svd')
plt.plot(np.linspace(0, 1, input_dim + 2), [0] + list(mud_problem.mud_point()) + [0], lw=5, c='purple', label='split')
plt.legend()
plt.title("Initial Samples & Solutions", fontsize=16)
plt.gcf()