We’re looking at what we can do to change the parameters for a model
to see how well it performs.
library(tidyverse)
library(tidymodels)
tidymodels_prefer()
Load the Data
read_delim("../development_gene_expression.txt") -> data
# The predicted variable needs to be factor
data %>%
mutate(Development=factor(Development)) %>%
select(Development,everything()) -> data
# We want to randomly shuffle the rows so there is no structure
set.seed(123)
data %>%
sample_frac() -> data
head(data)
We remove the gene names since we’re not using those and then split
the rest into test/training
data %>%
select(-gene) %>%
initial_split(prop=0.8) -> split_data
split_data
Set the main options
number_of_trees_to_build = 100
random_predictors_per_node = 20
minimum_measures_per_node = 5
Random Forest
rand_forest(trees=number_of_trees_to_build, min_n=minimum_measures_per_node, mtry=random_predictors_per_node) %>%
set_mode("classification") %>%
set_engine("ranger") -> model
model %>% translate()
Train the model
model %>%
fit(Development ~ ., data=training(split_data)) -> model_fit
model_fit
Test the model
Original Data
model_fit %>%
predict(new_data=training(split_data)) %>%
bind_cols(training(split_data)) %>%
group_by(.pred_class, Development) %>%
count()
model_fit %>%
predict(new_data=training(split_data)) %>%
bind_cols(training(split_data)) %>%
group_by(.pred_class, Development) %>%
count() %>%
mutate(
correct = .pred_class==Development
) %>%
group_by(correct) %>%
summarise(
n=sum(n)
)
New Data
model_fit %>%
predict(new_data=testing(split_data)) %>%
bind_cols(testing(split_data)) %>%
group_by(.pred_class, Development) %>%
count()
model_fit %>%
predict(new_data=testing(split_data)) %>%
bind_cols(testing(split_data)) %>%
group_by(.pred_class, Development) %>%
count() %>%
mutate(
correct = .pred_class==Development
) %>%
group_by(correct) %>%
summarise(
n=sum(n)
)
LS0tDQp0aXRsZTogIk1vZGVsIE9wdGltaXNhdGlvbiINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCldlJ3JlIGxvb2tpbmcgYXQgd2hhdCB3ZSBjYW4gZG8gdG8gY2hhbmdlIHRoZSBwYXJhbWV0ZXJzIGZvciBhIG1vZGVsIHRvIHNlZSBob3cgd2VsbCBpdCBwZXJmb3Jtcy4NCg0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeSh0aWR5bW9kZWxzKQ0KdGlkeW1vZGVsc19wcmVmZXIoKQ0KYGBgDQoNCkxvYWQgdGhlIERhdGENCj09PT09PT09PT09PT0NCg0KYGBge3J9DQpyZWFkX2RlbGltKCIuLi9kZXZlbG9wbWVudF9nZW5lX2V4cHJlc3Npb24udHh0IikgLT4gZGF0YQ0KDQojIFRoZSBwcmVkaWN0ZWQgdmFyaWFibGUgbmVlZHMgdG8gYmUgZmFjdG9yDQpkYXRhICU+JQ0KICBtdXRhdGUoRGV2ZWxvcG1lbnQ9ZmFjdG9yKERldmVsb3BtZW50KSkgJT4lDQogIHNlbGVjdChEZXZlbG9wbWVudCxldmVyeXRoaW5nKCkpIC0+IGRhdGENCg0KIyBXZSB3YW50IHRvIHJhbmRvbWx5IHNodWZmbGUgdGhlIHJvd3Mgc28gdGhlcmUgaXMgbm8gc3RydWN0dXJlDQpzZXQuc2VlZCgxMjMpDQpkYXRhICU+JQ0KICBzYW1wbGVfZnJhYygpIC0+IGRhdGENCg0KaGVhZChkYXRhKQ0KDQpgYGANCg0KV2UgcmVtb3ZlIHRoZSBnZW5lIG5hbWVzIHNpbmNlIHdlJ3JlIG5vdCB1c2luZyB0aG9zZSBhbmQgdGhlbiBzcGxpdCB0aGUgcmVzdCBpbnRvIHRlc3QvdHJhaW5pbmcNCg0KYGBge3J9DQpkYXRhICU+JQ0KICBzZWxlY3QoLWdlbmUpICU+JQ0KICBpbml0aWFsX3NwbGl0KHByb3A9MC44KSAtPiBzcGxpdF9kYXRhDQoNCnNwbGl0X2RhdGENCmBgYA0KDQpTZXQgdGhlIG1haW4gb3B0aW9ucw0KPT09PT09PT09PT09PT09PT09PT0NCg0KYGBge3J9DQpudW1iZXJfb2ZfdHJlZXNfdG9fYnVpbGQgPSAxMDANCnJhbmRvbV9wcmVkaWN0b3JzX3Blcl9ub2RlID0gMjANCm1pbmltdW1fbWVhc3VyZXNfcGVyX25vZGUgPSA1DQpgYGANCg0KDQoNCg0KUmFuZG9tIEZvcmVzdA0KLS0tLS0tLS0tLS0tLQ0KDQpgYGB7cn0NCnJhbmRfZm9yZXN0KHRyZWVzPW51bWJlcl9vZl90cmVlc190b19idWlsZCwgbWluX249bWluaW11bV9tZWFzdXJlc19wZXJfbm9kZSwgbXRyeT1yYW5kb21fcHJlZGljdG9yc19wZXJfbm9kZSkgJT4lDQogIHNldF9tb2RlKCJjbGFzc2lmaWNhdGlvbiIpICU+JQ0KICBzZXRfZW5naW5lKCJyYW5nZXIiKSAtPiBtb2RlbA0KDQptb2RlbCAlPiUgdHJhbnNsYXRlKCkNCmBgYA0KDQojIyMgVHJhaW4gdGhlIG1vZGVsDQoNCmBgYHtyfQ0KbW9kZWwgJT4lDQogIGZpdChEZXZlbG9wbWVudCB+IC4sIGRhdGE9dHJhaW5pbmcoc3BsaXRfZGF0YSkpIC0+IG1vZGVsX2ZpdA0KDQptb2RlbF9maXQNCmBgYA0KDQojIyMgVGVzdCB0aGUgbW9kZWwNCg0KIyMjIyBPcmlnaW5hbCBEYXRhDQoNCmBgYHtyfQ0KbW9kZWxfZml0ICU+JQ0KICBwcmVkaWN0KG5ld19kYXRhPXRyYWluaW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgYmluZF9jb2xzKHRyYWluaW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgZ3JvdXBfYnkoLnByZWRfY2xhc3MsIERldmVsb3BtZW50KSAlPiUNCiAgY291bnQoKSANCmBgYA0KDQpgYGB7cn0NCm1vZGVsX2ZpdCAlPiUNCiAgcHJlZGljdChuZXdfZGF0YT10cmFpbmluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGJpbmRfY29scyh0cmFpbmluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGdyb3VwX2J5KC5wcmVkX2NsYXNzLCBEZXZlbG9wbWVudCkgJT4lDQogIGNvdW50KCkgJT4lDQogIG11dGF0ZSgNCiAgICBjb3JyZWN0ID0gLnByZWRfY2xhc3M9PURldmVsb3BtZW50DQogICkgJT4lDQogIGdyb3VwX2J5KGNvcnJlY3QpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbj1zdW0obikNCiAgKQ0KYGBgDQoNCiMjIyMgTmV3IERhdGENCg0KYGBge3J9DQptb2RlbF9maXQgJT4lDQogIHByZWRpY3QobmV3X2RhdGE9dGVzdGluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGJpbmRfY29scyh0ZXN0aW5nKHNwbGl0X2RhdGEpKSAlPiUNCiAgZ3JvdXBfYnkoLnByZWRfY2xhc3MsIERldmVsb3BtZW50KSAlPiUNCiAgY291bnQoKQ0KYGBgDQoNCmBgYHtyfQ0KbW9kZWxfZml0ICU+JQ0KICBwcmVkaWN0KG5ld19kYXRhPXRlc3Rpbmcoc3BsaXRfZGF0YSkpICU+JQ0KICBiaW5kX2NvbHModGVzdGluZyhzcGxpdF9kYXRhKSkgJT4lDQogIGdyb3VwX2J5KC5wcmVkX2NsYXNzLCBEZXZlbG9wbWVudCkgJT4lDQogIGNvdW50KCkgJT4lDQogIG11dGF0ZSgNCiAgICBjb3JyZWN0ID0gLnByZWRfY2xhc3M9PURldmVsb3BtZW50DQogICkgJT4lDQogIGdyb3VwX2J5KGNvcnJlY3QpICU+JQ0KICBzdW1tYXJpc2UoDQogICAgbj1zdW0obikNCiAgKQ0KYGBgDQo=