We’re looking at what we can do to change the parameters for a model to see how well it performs.

library(tidyverse)
library(tidymodels)
tidymodels_prefer()

Load the Data

read_delim("../development_gene_expression.txt") -> data

# The predicted variable needs to be factor
data %>%
  mutate(Development=factor(Development)) %>%
  select(Development,everything()) -> data

# We want to randomly shuffle the rows so there is no structure
set.seed(123)
data %>%
  sample_frac() -> data

head(data)

We remove the gene names since we’re not using those and then split the rest into test/training

data %>%
  select(-gene) %>%
  initial_split(prop=0.8) -> split_data

split_data

Set the main options

number_of_trees_to_build = 100
random_predictors_per_node = 20
minimum_measures_per_node = 5

Random Forest

rand_forest(trees=number_of_trees_to_build, min_n=minimum_measures_per_node, mtry=random_predictors_per_node) %>%
  set_mode("classification") %>%
  set_engine("ranger") -> model

model %>% translate()

Train the model

model %>%
  fit(Development ~ ., data=training(split_data)) -> model_fit

model_fit

Test the model

Original Data

model_fit %>%
  predict(new_data=training(split_data)) %>%
  bind_cols(training(split_data)) %>%
  group_by(.pred_class, Development) %>%
  count() 
model_fit %>%
  predict(new_data=training(split_data)) %>%
  bind_cols(training(split_data)) %>%
  group_by(.pred_class, Development) %>%
  count() %>%
  mutate(
    correct = .pred_class==Development
  ) %>%
  group_by(correct) %>%
  summarise(
    n=sum(n)
  )

New Data

model_fit %>%
  predict(new_data=testing(split_data)) %>%
  bind_cols(testing(split_data)) %>%
  group_by(.pred_class, Development) %>%
  count()
model_fit %>%
  predict(new_data=testing(split_data)) %>%
  bind_cols(testing(split_data)) %>%
  group_by(.pred_class, Development) %>%
  count() %>%
  mutate(
    correct = .pred_class==Development
  ) %>%
  group_by(correct) %>%
  summarise(
    n=sum(n)
  )
LS0tDQp0aXRsZTogIk1vZGVsIE9wdGltaXNhdGlvbiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCldlJ3JlIGxvb2tpbmcgYXQgd2hhdCB3ZSBjYW4gZG8gdG8gY2hhbmdlIHRoZSBwYXJhbWV0ZXJzIGZvciBhIG1vZGVsIHRvIHNlZSBob3cgd2VsbCBpdCBwZXJmb3Jtcy4NCg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeSh0aWR5bW9kZWxzKQ0KdGlkeW1vZGVsc19wcmVmZXIoKQ0KYGBgDQoNCkxvYWQgdGhlIERhdGENCj09PT09PT09PT09PT0NCg0KYGBge3J9DQpyZWFkX2RlbGltKCIuLi9kZXZlbG9wbWVudF9nZW5lX2V4cHJlc3Npb24udHh0IikgLT4gZGF0YQ0KDQojIFRoZSBwcmVkaWN0ZWQgdmFyaWFibGUgbmVlZHMgdG8gYmUgZmFjdG9yDQpkYXRhICU+JQ0KICBtdXRhdGUoRGV2ZWxvcG1lbnQ9ZmFjdG9yKERldmVsb3BtZW50KSkgJT4lDQogIHNlbGVjdChEZXZlbG9wbWVudCxldmVyeXRoaW5nKCkpIC0+IGRhdGENCg0KIyBXZSB3YW50IHRvIHJhbmRvbWx5IHNodWZmbGUgdGhlIHJvd3Mgc28gdGhlcmUgaXMgbm8gc3RydWN0dXJlDQpzZXQuc2VlZCgxMjMpDQpkYXRhICU+JQ0KICBzYW1wbGVfZnJhYygpIC0+IGRhdGENCg0KaGVhZChkYXRhKQ0KDQpgYGANCg0KV2UgcmVtb3ZlIHRoZSBnZW5lIG5hbWVzIHNpbmNlIHdlJ3JlIG5vdCB1c2luZyB0aG9zZSBhbmQgdGhlbiBzcGxpdCB0aGUgcmVzdCBpbnRvIHRlc3QvdHJhaW5pbmcNCg0KYGBge3J9DQpkYXRhICU+JQ0KICBzZWxlY3QoLWdlbmUpICU+JQ0KICBpbml0aWFsX3NwbGl0KHByb3A9MC44KSAtPiBzcGxpdF9kYXRhDQoNCnNwbGl0X2RhdGENCmBgYA0KDQpTZXQgdGhlIG1haW4gb3B0aW9ucw0KPT09PT09PT09PT09PT09PT09PT0NCg0KYGBge3J9DQpudW1iZXJfb2ZfdHJlZXNfdG9fYnVpbGQgPSAxMDANCnJhbmRvbV9wcmVkaWN0b3JzX3Blcl9ub2RlID0gMjANCm1pbmltdW1fbWVhc3VyZXNfcGVyX25vZGUgPSA1DQpgYGANCg0KDQoNCg0KUmFuZG9tIEZvcmVzdA0KLS0tLS0tLS0tLS0tLQ0KDQpgYGB7cn0NCnJhbmRfZm9yZXN0KHRyZWVzPW51bWJlcl9vZl90cmVlc190b19idWlsZCwgbWluX249bWluaW11bV9tZWFzdXJlc19wZXJfbm9kZSwgbXRyeT1yYW5kb21fcHJlZGljdG9yc19wZXJfbm9kZSkgJT4lDQogIHNldF9tb2RlKCJjbGFzc2lmaWNhdGlvbiIpICU+JQ0KICBzZXRfZW5naW5lKCJyYW5nZXIiKSAtPiBtb2RlbA0KDQptb2RlbCAlPiUgdHJhbnNsYXRlKCkNCmBgYA0KDQojIyMgVHJhaW4gdGhlIG1vZGVsDQoNCmBgYHtyfQ0KbW9kZWwgJT4lDQogIGZpdChEZXZlbG9wbWVudCB+IC4sIGRhdGE9dHJhaW5pbmcoc3BsaXRfZGF0YSkpIC0+IG1vZGVsX2ZpdA0KDQptb2RlbF9maXQNCmBgYA0KDQojIyMgVGVzdCB0aGUgbW9kZWwNCg0KIyMjIyBPcmlnaW5hbCBEYXRhDQoNCmBgYHtyfQ0KbW9kZWxfZml0ICU+JQ0KICBwcmVkaWN0KG5ld19kYXRhPXRyYWluaW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgYmluZF9jb2xzKHRyYWluaW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgZ3JvdXBfYnkoLnByZWRfY2xhc3MsIERldmVsb3BtZW50KSAlPiUNCiAgY291bnQoKSANCmBgYA0KDQpgYGB7cn0NCm1vZGVsX2ZpdCAlPiUNCiAgcHJlZGljdChuZXdfZGF0YT10cmFpbmluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGJpbmRfY29scyh0cmFpbmluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGdyb3VwX2J5KC5wcmVkX2NsYXNzLCBEZXZlbG9wbWVudCkgJT4lDQogIGNvdW50KCkgJT4lDQogIG11dGF0ZSgNCiAgICBjb3JyZWN0ID0gLnByZWRfY2xhc3M9PURldmVsb3BtZW50DQogICkgJT4lDQogIGdyb3VwX2J5KGNvcnJlY3QpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbj1zdW0obikNCiAgKQ0KYGBgDQoNCiMjIyMgTmV3IERhdGENCg0KYGBge3J9DQptb2RlbF9maXQgJT4lDQogIHByZWRpY3QobmV3X2RhdGE9dGVzdGluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGJpbmRfY29scyh0ZXN0aW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgZ3JvdXBfYnkoLnByZWRfY2xhc3MsIERldmVsb3BtZW50KSAlPiUNCiAgY291bnQoKQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWxfZml0ICU+JQ0KICBwcmVkaWN0KG5ld19kYXRhPXRlc3Rpbmcoc3BsaXRfZGF0YSkpICU+JQ0KICBiaW5kX2NvbHModGVzdGluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGdyb3VwX2J5KC5wcmVkX2NsYXNzLCBEZXZlbG9wbWVudCkgJT4lDQogIGNvdW50KCkgJT4lDQogIG11dGF0ZSgNCiAgICBjb3JyZWN0ID0gLnByZWRfY2xhc3M9PURldmVsb3BtZW50DQogICkgJT4lDQogIGdyb3VwX2J5KGNvcnJlY3QpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbj1zdW0obikNCiAgKQ0KYGBgDQo=