library(tidyverse)
library(tidymodels)
tidymodels_prefer()
Load the Data
read_delim("../development_gene_expression.txt") -> data
# The predicted variable needs to be factor
data %>%
mutate(Development=factor(Development)) %>%
select(Development,everything()) -> data
# We want to randomly shuffle the rows so there is no structure
set.seed(123)
data %>%
sample_frac() -> data
head(data)
We’re going to do proper cross validation so we need to split into 10
chunks
data %>%
vfold_cv(v = 10) -> cv_data
Set the main options
number_of_trees_to_build = 100
random_predictors_per_node = 20
minimum_measures_per_node = 5
Random Forest
rand_forest(trees=number_of_trees_to_build, min_n=minimum_measures_per_node, mtry=random_predictors_per_node) %>%
set_mode("classification") %>%
set_engine("ranger") -> model
model %>% translate()
Train the model
model %>%
fit_resamples(Development ~ ., resamples = cv_data) -> model_fit
model_fit
collect_metrics(model_fit) -> model_metrics
model_metrics
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpsaWJyYXJ5KHRpZHltb2RlbHMpDQp0aWR5bW9kZWxzX3ByZWZlcigpDQpgYGANCg0KTG9hZCB0aGUgRGF0YQ0KPT09PT09PT09PT09PQ0KDQpgYGB7cn0NCnJlYWRfZGVsaW0oIi4uL2RldmVsb3BtZW50X2dlbmVfZXhwcmVzc2lvbi50eHQiKSAtPiBkYXRhDQoNCiMgVGhlIHByZWRpY3RlZCB2YXJpYWJsZSBuZWVkcyB0byBiZSBmYWN0b3INCmRhdGEgJT4lDQogIG11dGF0ZShEZXZlbG9wbWVudD1mYWN0b3IoRGV2ZWxvcG1lbnQpKSAlPiUNCiAgc2VsZWN0KERldmVsb3BtZW50LGV2ZXJ5dGhpbmcoKSkgLT4gZGF0YQ0KDQojIFdlIHdhbnQgdG8gcmFuZG9tbHkgc2h1ZmZsZSB0aGUgcm93cyBzbyB0aGVyZSBpcyBubyBzdHJ1Y3R1cmUNCnNldC5zZWVkKDEyMykNCmRhdGEgJT4lDQogIHNhbXBsZV9mcmFjKCkgLT4gZGF0YQ0KDQpoZWFkKGRhdGEpDQoNCmBgYA0KDQpXZSdyZSBnb2luZyB0byBkbyBwcm9wZXIgY3Jvc3MgdmFsaWRhdGlvbiBzbyB3ZSBuZWVkIHRvIHNwbGl0IGludG8gMTAgY2h1bmtzDQoNCmBgYHtyfQ0KZGF0YSAlPiUNCiAgdmZvbGRfY3YodiA9IDEwKSAtPiBjdl9kYXRhDQpgYGANCg0KU2V0IHRoZSBtYWluIG9wdGlvbnMNCj09PT09PT09PT09PT09PT09PT09DQoNCmBgYHtyfQ0KbnVtYmVyX29mX3RyZWVzX3RvX2J1aWxkID0gMTAwDQpyYW5kb21fcHJlZGljdG9yc19wZXJfbm9kZSA9IDIwDQptaW5pbXVtX21lYXN1cmVzX3Blcl9ub2RlID0gNQ0KYGBgDQoNCg0KDQoNClJhbmRvbSBGb3Jlc3QNCi0tLS0tLS0tLS0tLS0NCg0KYGBge3J9DQpyYW5kX2ZvcmVzdCh0cmVlcz1udW1iZXJfb2ZfdHJlZXNfdG9fYnVpbGQsIG1pbl9uPW1pbmltdW1fbWVhc3VyZXNfcGVyX25vZGUsIG10cnk9cmFuZG9tX3ByZWRpY3RvcnNfcGVyX25vZGUpICU+JQ0KICBzZXRfbW9kZSgiY2xhc3NpZmljYXRpb24iKSAlPiUNCiAgc2V0X2VuZ2luZSgicmFuZ2VyIikgLT4gbW9kZWwNCg0KbW9kZWwgJT4lIHRyYW5zbGF0ZSgpDQpgYGANCg0KIyMjIFRyYWluIHRoZSBtb2RlbA0KDQpgYGB7cn0NCm1vZGVsICU+JQ0KICBmaXRfcmVzYW1wbGVzKERldmVsb3BtZW50IH4gLiwgcmVzYW1wbGVzID0gY3ZfZGF0YSkgLT4gbW9kZWxfZml0DQoNCm1vZGVsX2ZpdA0KYGBgDQoNCmBgYHtyfQ0KY29sbGVjdF9tZXRyaWNzKG1vZGVsX2ZpdCkgLT4gbW9kZWxfbWV0cmljcw0KDQptb2RlbF9tZXRyaWNzDQpgYGANCg0KDQo=