Getting data into LiPD#
In this notebook we’ll illustrate how to use PyLiPD to load CSV based chronology data into the LiPD data format. We’ll begin with some pre-fabricated LiPD files created using the LiPD playground.
import os
import pandas as pd
import pyleoclim as pyleo
from pylipd.classes.dataset import Dataset
from pylipd.classes.datatable import DataTable
from pylipd.classes.paleodata import PaleoData
from pylipd.classes.chrondata import ChronData
from pylipd.classes.model import Model
from pylipd.lipd import LiPD
Defining a function to generate unique IDs for our LiPD data:
import uuid
def generate_unique_id(prefix='PYD'):
# Generate a random UUID
random_uuid = uuid.uuid4() # Generates a random UUID.
# Convert UUID format to the specific format we need
# UUID is usually in the form '1e2a2846-2048-480b-9ec6-674daef472bd' so we slice and insert accordingly
id_str = str(random_uuid)
formatted_id = f"{prefix}-{id_str[:5]}-{id_str[9:13]}-{id_str[14:18]}-{id_str[19:23]}-{id_str[24:28]}"
return formatted_id
To make this easy, we’ll define a function which will take as argument a path to a folder with a set of empty LiPD files, a path to a folder with csvs containing series data, a path to a csv containing positional data, a path to a folder containing age model data, and a target folder where the produced LiPD files will be saved.
def fill_LiPD_files(
lipd_empty = str,
chron_csv = str,
depth_csv = str,
target_dir = str,
):
""" Fill empty LiPD files with data from csv files.
Parameters
----------
lipd_empty : str
Path to the empty LiPD files directory.
series_csv : str
Path to the directory containing the series data in csvs.
chron_csv : str
Path to the directory containing the chron data in csvs.
depth_csv : str
Path to the directory containing the depth data in csvs.
target_dir : str
Path to the directory where the filled LiPD files will be saved.
Returns
-------
None
"""
# Get the list of empty LiPD files
lipd_files = os.listdir(lipd_empty)
# Loop through the LiPD files
for file in lipd_files:
if file == '.DS_Store':
continue
# Load the empty LiPD file
D = LiPD()
D.load(os.path.join(lipd_empty, file))
# Export to Datasets object
ds = D.get_datasets()[0]
# Get the record name
record_name = file.split('.')[0]
# Load the chron data
# chron = pd.read_csv('Dongge.chrondf.csv')
chron = pd.read_csv(os.path.join(chron_csv, f'{record_name}.chron.csv')).to_numpy()
depth = pd.read_csv(os.path.join(depth_csv, f'{record_name}.depth.csv')).to_numpy()
df_chron = pd.DataFrame({'depth': depth.tolist()})
df_chron['year'] = [[chron[j][i] for j in range(1000)] for i in range(chron.shape[1])]
year_columns = [i+2 for i in range(chron.shape[0])]
df_chron.attrs = {
'year': {'number': str(year_columns), 'variableName': 'Age', 'units': 'yr BP', 'TSid':generate_unique_id()},
'depth' : {'number': 1, 'variableName': 'depth', 'units': 'cm', 'TSid':generate_unique_id()}
}
# Add the chron data to a data table
ensembleTable = DataTable()
ensembleTable.setDataFrame(df_chron)
ensembleTable.setFileName("chron0model0ensemble0.csv")
# Add the data table to a model
model = Model()
model.addEnsembleTable(ensembleTable)
# Add the model to a chron data
cd = ChronData()
cd.addModeledBy(model)
# Add the chron data to the dataset
ds.addChronData(cd)
# Save the data
lipd = LiPD()
lipd.load_datasets([ds])
lipd.create_lipd(ds.getName(), os.path.join(target_dir, f'{file}'))
Now we just call our function with the appropriate paths:
fill_LiPD_files(
lipd_empty = "../../data/LiPD/empty",
chron_csv = "../../data/CSV/raw_chron",
depth_csv = "../../data/CSV/depth",
target_dir = "../../data/LiPD/full"
)
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|█████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 74.58it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 110.62it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 116.77it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 103.99it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 120.75it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 122.70it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 121.54it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 113.12it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 111.65it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 107.93it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 118.19it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 109.49it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 109.72it/s]
Loaded..
Loading 1 LiPD files
0%| | 0/1 [00:00<?, ?it/s]
100%|████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:00<00:00, 125.42it/s]
Loaded..