KDE Plots

KDE Plots#

The purpose of this notebook is to reproduce figures presenting results from our interval statistics analysis using BChron age models. Here we show the application of interval statistic techniques to the median interval values.

Note: This notebook assumes the existence of pickle files that need to have been created previously. If you are running this notebook on your machine, make sure you’ve successfully run both of the notebooks in the Loading Data folder, as well as the Synthetic_Data notebook in this folder.

import os
from tqdm import tqdm

import seaborn as sns
import pandas as pd
import numpy as np
import pickle
import matplotlib.pyplot as plt
from scipy.stats import zscore
import pyleoclim as pyleo

from matplotlib.lines import Line2D
from matplotlib import gridspec
from pylipd.lipd import LiPD
with open('../../data/pickle/series_dict.pkl','rb') as handle:
    series_dict = pickle.load(handle)

# Sort by latitude

lat_dict = {series.lat:label for label,series in series_dict.items()}

sort_index = np.sort(np.array(list(lat_dict.keys())))[::-1]
sort_label = [lat_dict[lat] for lat in sort_index]
series_dict = {label:series_dict[label] for label in sort_label}
with open('../../data/pickle/holo_chrons_study.pkl','rb') as handle:
    holo_chrons_study = pickle.load(handle)

Loading chronology objects into a more easily accessible dictionary format using PyLiPD:

data_path = '../../data/LiPD/full'
D = LiPD()
D.load_from_dir(data_path)
lipd_records = D.get_all_dataset_names()

holo_chrons = {}

for record in tqdm(lipd_records):
    D = LiPD()
    D.load(os.path.join(data_path,f'{record}.lpd'))
    df = D.get_timeseries_essentials().iloc[0]

    holo_chrons[record] = {}
    holo_chrons[record]['d18O'] = df['paleoData_values']

    ens_df = D.get_ensemble_tables().iloc[0]
    holo_chrons[record]['chron'] = ens_df['ensembleVariableValues'].T

holo_chrons = {key:holo_chrons[key] for key in sort_label} # Sort by latitude
Loading 14 LiPD files
  0%|                                                                                            | 0/14 [00:00<?, ?it/s]
 14%|████████████                                                                        | 2/14 [00:00<00:02,  4.56it/s]
 29%|████████████████████████                                                            | 4/14 [00:00<00:01,  6.95it/s]
 36%|██████████████████████████████                                                      | 5/14 [00:00<00:01,  5.26it/s]
 43%|████████████████████████████████████                                                | 6/14 [00:01<00:01,  5.41it/s]
 50%|██████████████████████████████████████████                                          | 7/14 [00:01<00:01,  5.91it/s]
 57%|████████████████████████████████████████████████                                    | 8/14 [00:01<00:01,  4.42it/s]
 64%|██████████████████████████████████████████████████████                              | 9/14 [00:01<00:01,  4.53it/s]
 71%|███████████████████████████████████████████████████████████▎                       | 10/14 [00:01<00:00,  4.96it/s]
 79%|█████████████████████████████████████████████████████████████████▏                 | 11/14 [00:02<00:00,  5.45it/s]
 86%|███████████████████████████████████████████████████████████████████████▏           | 12/14 [00:02<00:00,  5.26it/s]
 93%|█████████████████████████████████████████████████████████████████████████████      | 13/14 [00:02<00:00,  4.86it/s]
100%|███████████████████████████████████████████████████████████████████████████████████| 14/14 [00:02<00:00,  5.37it/s]

Loaded..
  0%|                                                                                            | 0/14 [00:00<?, ?it/s]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.59it/s]

Loaded..
  7%|██████                                                                              | 1/14 [00:01<00:14,  1.15s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.96it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.95it/s]

Loaded..
 14%|████████████                                                                        | 2/14 [00:05<00:39,  3.27s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.65it/s]

Loaded..
 21%|██████████████████                                                                  | 3/14 [00:06<00:24,  2.20s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 11.01it/s]

Loaded..
 29%|████████████████████████                                                            | 4/14 [00:07<00:17,  1.72s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.01it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  3.99it/s]

Loaded..
 36%|██████████████████████████████                                                      | 5/14 [00:10<00:18,  2.06s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.88it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.85it/s]

Loaded..
 43%|████████████████████████████████████                                                | 6/14 [00:12<00:16,  2.01s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.65it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.61it/s]

Loaded..
 50%|██████████████████████████████████████████                                          | 7/14 [00:13<00:12,  1.85s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  2.77it/s]

Loaded..
 57%|████████████████████████████████████████████████                                    | 8/14 [00:18<00:16,  2.82s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.92it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.90it/s]

Loaded..
 64%|██████████████████████████████████████████████████████                              | 9/14 [00:21<00:13,  2.65s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.10it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  6.07it/s]

Loaded..
 71%|███████████████████████████████████████████████████████████▎                       | 10/14 [00:22<00:09,  2.36s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.22it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  7.18it/s]

Loaded..
 79%|█████████████████████████████████████████████████████████████████▏                 | 11/14 [00:24<00:06,  2.13s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.23it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  5.21it/s]

Loaded..
 86%|███████████████████████████████████████████████████████████████████████▏           | 12/14 [00:26<00:04,  2.16s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]


100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.44it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00,  4.43it/s]

Loaded..
 93%|█████████████████████████████████████████████████████████████████████████████      | 13/14 [00:29<00:02,  2.34s/it]
Loading 1 LiPD files

  0%|                                                                                             | 0/1 [00:00<?, ?it/s]

100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 14.21it/s]

Loaded..
100%|███████████████████████████████████████████████████████████████████████████████████| 14/14 [00:30<00:00,  1.83s/it]
100%|███████████████████████████████████████████████████████████████████████████████████| 14/14 [00:30<00:00,  2.14s/it]

# detrending and collect d18O values at different intervals

#d18O values at different intervals
d18O_int={}
#Detrended zscore vals
zscore_detrend={}

for site in holo_chrons.keys():
    ty = holo_chrons[site]['d18O'].astype(float)
    chron_tmp = holo_chrons[site]['chron'].astype(float)
    nC = 1000

    zscore_detrend[site]=np.zeros((nC,len(ty)))
    zscore_detrend[site][:]=np.nan
    
    for j in range(nC):
        tx=chron_tmp[j,:]
        # detrending by the Pyleoclim detrending function
        ts = pyleo.Series(time=tx, value=ty,dropna=False,verbose=False)
        if site != 'Idealized':
            ts_detrended= ts.detrend(method='savitzky-golay')
            a=ts_detrended.value
            zscore_detrend[site][j,:] = zscore(a,nan_policy='omit')
        else:
            a=ts.value
            zscore_detrend[site][j,:] = a

    d18O_int[site]={}
    holo_int={}
    # int_size: interval size
    for int_size in [150,200]:
        holo_int[int_size] = np.arange(4000%int_size-int_size/2,10000,int_size)
        d18O_int[site][int_size]=np.zeros((len(holo_int[int_size])-1,nC))
        d18O_int[site][int_size][:]=np.nan
        # iterate over each interval
        for idx,tage in enumerate(holo_int[int_size][:-1]):
            for j in range(nC):
                tx = chron_tmp[j,:]
                #calculate median interval value for each chron
                d18O_int[site][int_size][idx,j]=np.nanmedian(zscore_detrend[site][j,(tx>=tage) & (tx<holo_int[int_size][idx+1])])
# Plotting

# Set color palette based on the number of keys in holo_chrons
clrs = sns.color_palette(palette='tab20', n_colors=len(holo_chrons.keys()) + 1)
clrs[14] = '#FFD700' # Make the last color gold

# Create a figure with 5 columns and 3 rows of subplots
fig, ax = plt.subplots(ncols=5, nrows=3, figsize=(16, 12), tight_layout=True)

# Flatten the 2D array of axes into a 1D array
axes = ax.ravel()

int_size = 200

# Iterate over each site in holo_chrons
for idx, site in enumerate(holo_chrons.keys()):
    # Create a pandas Series from the flattened d18O_int for the current site and interval size
    s = pd.Series(d18O_int[site][int_size].flatten())

    # Plot a histogram with KDE for the current site
    sns.histplot(s.dropna(), color=clrs[idx], ax=axes[idx], kde=True, ec='white', stat='density')

    # Set the title of the subplot
    axes[idx].set_title(site, fontsize=13)

    # Set the y-label for specific subplots
    if idx in [0, 5, 10]:
        axes[idx].set_ylabel('Density', fontsize=13)
    else:
        axes[idx].set_ylabel('')

    # Set the tick label size for both axes
    axes[idx].tick_params(axis='both', which='major', labelsize=12)

    # Set the x-label for the bottom row of subplots
    if idx >= 9:
        axes[idx].set_xlabel(u'$\delta^{18}$O (\u2030)', fontsize=13)

    if idx == 14:
        # Add a legend to the last subplot
        axes[idx].set_xlim(-.5,1)

    # Iterate over each column in d18O_int for the current site and interval size
    for i in range(d18O_int[site][int_size].shape[1]):
        # Add a vertical line at the specified x-value with low opacity
        axes[idx].axvline(x=d18O_int[site][int_size][holo_int[int_size][:-1] == 4000 - int_size / 2, i], alpha=0.01, color='darkgray')

# Remove the last unused subplot
fig.delaxes(axes[-1])
../../_images/5b84ca7f4f75b4c97510a64b236bc12faf93699634d11a171753c4922200ca9d.png
with open('../../data/pickle/synthetic_signal_dict.pkl','rb') as handle:
    sn_signal_dict = pickle.load(handle)
# detrending and collect d18O values at different intervals
sn_ratios = list(sn_signal_dict.keys())
sn_int = {ratio : {} for ratio in sn_ratios}
sn_interval = {ratio : {} for ratio in sn_ratios}

for sn in [0.5,1,2]:
    #d18O values at different intervals
    syn_d18O_int={}
    #Detrended zscore vals
    syn_zscore_detrend={}
    # d18O values over the 4ka interval
    syn_interval_dict = {}
    signal_dict = sn_signal_dict[sn]

    for site in signal_dict.keys():
        ty = sn_signal_dict[sn][site]
        chron_tmp = holo_chrons[site]['chron'].astype(float)
        nC = 1000

        syn_zscore_detrend[site]=np.zeros((nC,len(ty)))
        syn_zscore_detrend[site][:]=np.nan
        
        for j in range(nC):
            tx=chron_tmp[j,:]
            # detrending by the Pyleoclim detrending function
            ts = pyleo.Series(time=tx, value=ty,dropna=False,verbose=False)
            if site != 'Idealized':
                ts_detrended= ts.detrend(method='savitzky-golay')
                a=ts_detrended.value
                syn_zscore_detrend[site][j,:] = zscore(a,nan_policy='omit')
            else:
                a=ts.value
                syn_zscore_detrend[site][j,:] = a

        syn_d18O_int[site]={}
        syn_interval_dict[site] = []
        holo_int={}
        # int_size: interval size
        for int_size in [150,200]:
            holo_int[int_size] = np.arange(4000%int_size-int_size/2,10000,int_size)
            syn_d18O_int[site][int_size]=np.zeros((len(holo_int[int_size])-1,nC))
            syn_d18O_int[site][int_size][:]=np.nan
            # iterate over each interval
            for idx,tage in enumerate(holo_int[int_size][:-1]):
                for j in range(nC):
                    tx = chron_tmp[j,:]
                    #calculate median interval value for each chron
                    syn_d18O_int[site][int_size][idx,j]=np.nanmedian(syn_zscore_detrend[site][j,(tx>=tage) & (tx<holo_int[int_size][idx+1])])
            # Iterate over the columns of d18O_int for the current site and interval size
            for i in range(syn_d18O_int[site][int_size].shape[1]):
                # Append the d18O values at the 4ka interval to interval_dict
                syn_interval_dict[site].append(syn_d18O_int[site][int_size][holo_int[int_size][:-1] == 4000 - int_size / 2, i])
    sn_int[sn] = syn_d18O_int
    sn_interval[sn] = syn_interval_dict
# Plotting

# Set color palette based on the number of keys in holo_chrons
clrs = sns.color_palette(palette='tab20', n_colors=len(holo_chrons.keys()) + 1)
clrs[14] = '#FFD700' # Make the last color gold
linestyles= ['dashed','dashdot','dotted']

# # Create a figure with 5 columns and 3 rows of subplots
# fig, ax = plt.subplots(ncols=5, nrows=3, figsize=(16, 12), tight_layout=True)
# fig.subplots_adjust(hspace=1, wspace=-.1)

# # Flatten the 2D array of axes into a 1D array
# axes = ax.ravel()

fig = plt.figure(figsize=(16, 12))
gs = gridspec.GridSpec(3, 5, hspace=0.2, wspace=0.5)

int_size = 200
# Iterate over each site in holo_chrons
for idx, site in enumerate(holo_chrons.keys()):
    ax = fig.add_subplot(gs[idx])
    # Create a pandas Series from the flattened d18O_int for the current site and interval size
    s = pd.Series(d18O_int[site][int_size].flatten())

    for idy,sn in enumerate([0.5,1,2]):
        syn = pd.Series(sn_int[sn][site][int_size].flatten())
        sns.kdeplot(syn.dropna(), color=clrs[idx], ax=ax,linewidth=2,linestyle=linestyles[idy],common_norm=True,alpha=.5)
        median = np.nanmedian(sn_interval[sn][site])
        ax.axvline(x=median, alpha=1, color='black', linewidth=2, linestyle=linestyles[idy])

    # Plot a histogram with KDE for the current site
    sns.kdeplot(s.dropna(), color=clrs[idx], ax=ax,linewidth=3)

    # Set the title of the subplot
    ax.set_title(site, fontsize=13)

    # Set the y-label for specific subplots
    if idx in [0, 5, 10]:
        ax.set_ylabel('Density', fontsize=13)
    else:
        ax.set_ylabel('')

    # Set the tick label size for both axes
    ax.tick_params(axis='both', which='major', labelsize=12)

    # Set the x-label for the bottom row of subplots
    if idx >= 9:
        ax.set_xlabel(u'$\delta^{18}$O (\u2030)', fontsize=13)

    # if idx == 13:
    #     # Add a legend to the last subplot
    #     axes[idx].set_xlim(-.5,1)
    #    #axes[idx].legend(labels=['0.5','1','2'],title='SNR',fontsize=12)

    if idx == 13:
        # Create legend handles
        handle1 = Line2D([],[],color='black', linestyle='dashed', label='SNR = 0.5')
        handle2 = Line2D([],[],color='black', linestyle='dashdot', label='SNR = 1')
        handle3 = Line2D([],[],color='black', linestyle='dotted', label='SNR = 2')

        # Place the handles in a list
        legend_handles = [handle1, handle2, handle3]
        # Use the handles to create a legend
        ax.legend(handles=legend_handles,loc='right', bbox_to_anchor=(2,.5))

    # Iterate over each column in d18O_int for the current site and interval size
    for i in range(d18O_int[site][int_size].shape[1]):
        # Add a vertical line at the specified x-value with low opacity
        ax.axvline(x=d18O_int[site][int_size][holo_int[int_size][:-1] == 4000 - int_size / 2, i], alpha=0.01, color='darkgray')
../../_images/9104039b53b7df199a7b238d423be621b42f9d21dd5451f4347ea8e8bf05ac01.png