-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.r
27 lines (21 loc) · 891 Bytes
/
train.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# just train and store a model in a file
library("pls", quietly = TRUE)
# load training set
data <- as.matrix(read.table("data/solubility_train_std_01.csv",
colClasses = "numeric", header = TRUE))
ydim <- dim(data)[1]
xdim <- dim(data)[2]
stopifnot(xdim == 842 && ydim == 1004)
xs <- data[, 2:842] # all lines, only feature columns
ys <- data[, 1:1] # all lines, 1st column only (response var.)
# I(x) prevents xs from having its columns split into different variables
train_data <- data.frame(y = ys, x = I(xs))
# just train a model, assuming ncomp_best = 13
model <- plsr(y ~ x, ncomp = 13, method = "simpls", data = train_data,
validation = "none")
# extract the coefficients
coefs <- coef(model)
write.table(coefs, file = "oplsr_coefs.txt", sep = "\n",
row.names = F, col.names = F)
save(model, file="oplsr_model.bin")
quit()