forked from USGS-R/GLRI_CEC_2016
-
Notifications
You must be signed in to change notification settings - Fork 0
/
20_merge_data.yml
119 lines (86 loc) · 3.35 KB
/
20_merge_data.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
target_default: 20_merge_data
include:
- 15_clean_data.yml
packages:
- dplyr
- tidyr
- readr
- dataRetrieval
- openxlsx
sources:
- 20_merge_data/src/merge_data.R
targets:
20_merge_data:
depends:
- chem_dls
- data/pesticides.xlsx
- data/pesticides_Conc.xlsx
- data/pesticides_Bench.xlsx
- chemicalSummary
- chemicalSummary_bench
- chemicalSummary_conc
- chemicalSummary_dls
- chemicalSummary_bench_hbs
merged_dat:
command: merge_data(tracking, pesticides_clean, neonics_clean, glyphosate_clean)
reduced_dat:
command: remove_duplicate_chems(merged_dat)
special_cas:
command: get_special_cas()
chem_dls:
command: calc_detect_limits(chem_data_complete)
filtered_censor:
command: remove_censor(reduced_dat)
chem_data:
command: create_chemData(filtered_censor, pCodeInfo)
no_cas_crosswalk:
command: crosswalk_nocas_chems()
all_chems:
command: create_chemData(reduced_dat, pCodeInfo)
chem_data_complete:
command: create_chemData2(reduced_dat, pCodeInfo)
chem_data_dl:
command: create_chemData(chem_dls, pCodeInfo)
chems_missing_cas:
command: find_missing_cas(reduced_dat, chem_data)
chem_info:
command: create_tox_chemInfo(chem_data, special_cas, pCodeInfo, classes_fixed)
chem_info_all:
command: create_tox_chemInfo(all_chems, special_cas, pCodeInfo, classes_fixed)
chem_info_complete:
command: create_tox_chemInfo(chem_data_complete, special_cas, pCodeInfo, classes_fixed)
# all compounds, all metadata, with fixed cas
chem_master:
command: fix_missing_cas(chem_info_complete, chem_crosswalk)
# now fix the chem data to include missing cas
chem_master_data:
command: complete_cas(chem_data_complete, chem_master, filter_dl = TRUE)
chem_master_data_dls:
command: complete_cas(chem_data_complete, chem_master, filter_dl = FALSE)
site_info:
command: create_tox_siteInfo(sites)
# exclude extra Maumee sites for certain analysis
# kept dates that were most aligned with other samples, kept distaince ~1 month apart
maumee_exclude:
command: c(I(c('2016-05-10', '2016-06-07', '2016-06-14', '2016-06-21', '2016-07-20', '2016-08-25')))
data/pesticides.xlsx:
command: create_toxExcel(chem_master_data, chem_master, site_info, exclusions, target_name)
data/pesticides_dls.xlsx:
command: create_toxExcel(chem_data_dl, chem_info, site_info, exclusions, target_name)
# use all chems, not just chems with CAS numbers
data/pesticides_Conc.xlsx:
command: create_ConcExcel(chem_master_data, chem_master, site_info, exclusions, target_name)
data/pesticides_Bench.xlsx:
command: create_WQExcel(chem_master_data, chem_master, site_info, exclusions, benchmarks, target_name)
data/pesticides_Bench_hbs.xlsx:
command: create_WQExcel(chem_data, chem_info, site_info, exclusions, benchmarks_hbs, target_name)
chemicalSummary:
command: get_chem_sum("data/pesticides.xlsx")
chemicalSummary_dls:
command: get_chem_sum("data/pesticides_dls.xlsx")
chemicalSummary_bench:
command: get_chem_bench("data/pesticides_Bench.xlsx")
chemicalSummary_bench_hbs:
command: get_chem_bench("data/pesticides_Bench_hbs.xlsx")
chemicalSummary_conc:
command: get_conc_summary("data/pesticides_Conc.xlsx")