-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmeds_meta_analysis.py
101 lines (88 loc) · 4.26 KB
/
meds_meta_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/usr/bin/python
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import seaborn as sns
sns.set(style='ticks', palette=sns.color_palette('colorblind'))
from bgcArgoDMQC import get_index
# get the global and biogeochemical profile index
gbl = get_index('global')
bgc = get_index('bgc')
# only concerned with meds floats
meds_gbl = gbl[gbl.dac == 'meds']
meds_bgc = bgc[bgc.dac == 'meds']
# pre-allocate array for data mode of oxygen
doxy_mode = np.array(meds_bgc.shape[0]*[' '])
# loop through, find the location of oxygen in parameter, and extract the data mode
for i,ix in enumerate(meds_bgc.index):
param_list = np.array(meds_bgc.loc[ix].parameters.split(' '))
doxy_index = np.where(param_list == 'DOXY')[0][0]
doxy_mode[i] = meds_bgc.loc[ix].parameter_data_mode[doxy_index]
# add the data mode to the data frame
meds_bgc['doxy_mode'] = doxy_mode
# get the number of floats in each possible data mode
N_adjust = np.sum(meds_bgc.doxy_mode == 'A')
N_delay = np.sum(meds_bgc.doxy_mode == 'D')
N_real = np.sum(meds_bgc.doxy_mode == 'R')
# print them out
print(N_real, N_adjust, N_delay)
# now get the core file for each bio file, including if they are in different data modes
meds_bgc['core_file'] = np.array([f.replace('B', '') for f in meds_bgc.file])
meds_bgc['core_D_file'] = np.array([f.replace('BR', 'D') for f in meds_bgc.file])
meds_core_bgc = meds_gbl[meds_gbl.file.isin(meds_bgc.core_file) | meds_gbl.file.isin(meds_bgc.core_D_file)]
# pre-allocate array for data mode of core variables
core_mode = np.array(meds_core_bgc.shape[0]*[' '])
# loop through and get the data mode, just based on file name for core Argo
for i,ix in enumerate(meds_core_bgc.index):
fn = meds_core_bgc.loc[ix].file
core_mode[i] = fn.split('/')[-1][0]
# add to data frame
meds_core_bgc['core_mode'] = core_mode
# create a third data frame with the file names and oxygen data modes
# requires to match file names
df = pd.DataFrame()
df['bgc_file'] = meds_bgc.file
df['wmo'] = meds_bgc.wmo
df['cycle'] = meds_bgc.cycle
df['date'] = meds_bgc.date
sorted_dm = np.array(doxy_mode.shape[0]*[' '])
for i, wmo, cyc in zip(range(doxy_mode.shape[0]), meds_core_bgc.wmo, meds_core_bgc.cycle):
sorted_dm[i] = meds_core_bgc[np.logical_and(meds_core_bgc.wmo == wmo, meds_core_bgc.cycle == cyc)].core_mode.iloc[0]
# put into single dataframe
df['core_mode'] = sorted_dm
df['doxy_mode'] = meds_bgc.doxy_mode
# get number of each kind of match
N_r_d = df[np.logical_and(df.doxy_mode == 'R', df.core_mode == 'D')].shape[0]
N_a_d = df[np.logical_and(df.doxy_mode == 'A', df.core_mode == 'D')].shape[0]
N_a_r = df[np.logical_and(df.doxy_mode == 'A', df.core_mode == 'R')].shape[0]
N_r_r = df[np.logical_and(df.doxy_mode == 'R', df.core_mode == 'R')].shape[0]
N_d_d = df[np.logical_and(df.doxy_mode == 'D', df.core_mode == 'D')].shape[0]
# show each count
print(N_r_d, N_a_d, N_a_r, N_r_r, N_d_d, df.shape[0])
# put the counts into a dataframe for visualizing
ct = pd.DataFrame(dict(
names=['Both RT', 'BGC RT, Core DM', 'BGC A, Core RT', 'BGC A, Core DM', 'Both DM'],
counts=[N_r_r, N_r_d, N_a_r, N_a_d, N_d_d]
))
ct['percent'] = [100*n/df.shape[0] for n in ct.counts]
# repeat as above but remove young profiles (< 1 yr)
current_date = mdates.datetime.date.today()
df = df[df.date.notna()] # there are two nan dates?
df['date'] = [mdates.datetime.date(int(s[:4]), int(s[4:6]), int(s[6:8])) for s in df.date.astype(str)]
df = df[current_date - df.date > mdates.datetime.timedelta(365)]
# get number of each kind of match
N_r_d = df[np.logical_and(df.doxy_mode == 'R', df.core_mode == 'D')].shape[0]
N_a_d = df[np.logical_and(df.doxy_mode == 'A', df.core_mode == 'D')].shape[0]
N_a_r = df[np.logical_and(df.doxy_mode == 'A', df.core_mode == 'R')].shape[0]
N_r_r = df[np.logical_and(df.doxy_mode == 'R', df.core_mode == 'R')].shape[0]
N_d_d = df[np.logical_and(df.doxy_mode == 'D', df.core_mode == 'D')].shape[0]
# show each count
print(N_r_d, N_a_d, N_a_r, N_r_r, N_d_d, df.shape[0])
# put the counts into a dataframe for visualizing
ct_dm = pd.DataFrame(dict(
names=['Both RT', 'BGC RT, Core DM', 'BGC A, Core RT', 'BGC A, Core DM', 'Both DM'],
counts=[N_r_r, N_r_d, N_a_r, N_a_d, N_d_d]
))
ct_dm['percent'] = [100*n/df.shape[0] for n in ct.counts]