library(readxl) # to read in data setwd("~/SINTEF") library(here) library(tidyr) library(ggplot2) library(ggridges) library(tidyverse) library(rpivotTable) library(rpart) library(psych) library(party) library(partykit) library(caret) library(sjlabelled) ?sjlabelled install.packages("sjlabelled") # This below makes them 3 factors, not stressed, stressed and none... df_recat2$S_15DLabel <- cut(df_recat2$S_15D,c(0,3,5,6),labels=c("Not Stressed","stressed", "None")) summary(df_recat2$S_15DLabel) str(df_recat2$S_15DLabel) summary(df_recat$S_15Dcombined) df_recat$S_15Dcombined <- as.factor(df_recat$S_15Dcombined) predictor_recat2 <- S_15DLabel~., summary(df_recat2$S_15DLabel) summary(df_factor$S_15Dcombined) df_recat2 <- df dfName <- read_xlsx(paste0(here(), "/","Digitalt_stress_våren2017_verdier.xlsx")) # Mutation df_recat <- df %>% mutate(S_09Binary = if_else(S_09A & S_09B & S_09C & S_09D & S_09E & S_09F & S_09G & S_09H & S_09I & S_09J & S_09K & S_09L & S_09M & S_09N & S_09O & S_09P & S_09Q <= 4, "Digital", "Not Digital")) df_recat <- df_recat %>% mutate(S_09SuperDigi = if_else(S_09A & S_09B & S_09C & S_09D & S_09E & S_09F & S_09G & S_09H & S_09I & S_09J & S_09K & S_09L & S_09M & S_09N & S_09O & S_09P & S_09Q <= 4, "Digital", "Not Digital")) df_recat <- df_recat %>% mutate(S_15DBinary = ifelse(S_15D==5 | S_15D==4, "stressed", "not stressed" )) df_recat <- df_recat %>% mutate(S_06Binary = ifelse(S_06==1 | S_06==2 | S_06==3, "Leadership", "No Leader")) df_recat$S_06Binary <- as.factor(df_recat$S_06Binary) # Experimenting with turning S_15 into one thing # df2 <- df # df2$S_15E <- cut(df2$S_15E,c(0,1,2,3,4,5,6),labels=c("6","5","4","3","2","1")) # str(df2$S_15E) # df2$S_15E <- as.numeric(df2$S_15E) # df_recat <- df2 %>% mutate(S_15Combined = if_else(S_15A & S_15B & S_15C & S_15D & S_15E <= 4, "Not Stressed","Stressed")) # df_treeFactor2$S_15Combined <- as.factor(df_recat$S_15Combined) # str(df_treeFactor2$S_15Combined) df_treeFactor2$S_15ABinary <- NULL df_treeFactor2$S_15BBinary <- NULL df_treeFactor2$S_15CBinary <- NULL df_treeFactor2$S_15EBinary <- NULL str(df_recat$S_06Binary) df_recat <- df_recat %>% mutate(S_09full = across(starts_with("S_09"), -S_09R, -s_09r_open, na_if, NA)) df_recat <- df_recat %>% mutate(s_15ABinary = ifelse(S_15A==5 | S_15A==4, "Digital Work-Overload", "Digital Work-Relief")) df_recat <- df_recat %>% mutate(s_15CBinary = ifelse(S_15C==5 | S_15C==4, "Digital Time-Pressure", "Digital Time-Relief")) df_recat <- df_recat %>% mutate(S_15EBinary = ifelse(S_15E==1 | S_15E==2, "Digital Stressed Available", "Digital Positively Available")) df_recat <- df_recat %>% mutate(S_16Binary = ifelse(S_16==1 | S_16==2 | S_16==3, "Available", "Not Available")) df_recat <- df_recat %>% mutate(S_19Binary = ifelse(S_19==5 | S_19==4 | S_19==6, "No Involvement", "Implementation Involvement")) df_recat <- df_recat %>% mutate(S_23Binary = ifelse(S_23==1 | S_23==2 | S_23==3, "Digitally Supported", "Not Digitally Supported")) df_recat <- df_recat %>% mutate(S_28Binary = ifelse(S_28==1 | S_28==2 | S_28==3, "Digitally Trained", "Not Digitally Trained")) df_recat <- df_recat %>% mutate(S_29Binary = ifelse(S_29==1 | S_29==2 | S_29==3, "Digitally Supported", "Not Digitally Supported")) df_recat <- df_recat %>% mutate(S_30EBinary = ifelse(S_30E==6 | S_30E==5 | S_30E==4,| S_30E==3, "Technostress", "Not Stressed")) df_recat <- df_recat %>% mutate(S_30FBinary = ifelse(S_30F==6 | S_30F==5 | S_30F==4 | S_30F==3, "Stressed Available", "Not Stressed")) df_recat <- df_recat %>% mutate(S_30CBinary = ifelse(S_30C==6 | S_30C==5 | S_30C==4 | S_30C==3, "Stressed Mistake", "Not Stressed")) df_recat <- df_recat %>% mutate(S_30ABinary = ifelse(S_30A==6 | S_30A==5 | S_30A==4 | S_30A==3 | S_30A==2, "Stressed Workload", "Not Stressed")) # This above, --stressed workload-- check if I have fixed this for the training data and validation set, if not consider changing all 30's to include the "2" check the questionaire df_recat <- df_recat %>% mutate (UTD_kat = ifelse(UTD==3 | UTD==4, "Higher Education", "Other")) # I need factors for plotting df_factor <- df_recat # To make all them factors, I have no clue, but one by one: df_factor$S_15Dcombined <- as.factor(df_factor$S_15Dcombined) df_factor$UTD_kat <- as.factor(df_factor$UTD_kat) df_factor$S_06Binary <- as.factor(df_factor$S_06Binary) df_factor$S_09Binary <- as.factor(df_factor$S_09Binary) df_factor$s_15ABinary <- as.factor(df_factor$s_15ABinary) df_factor$S_16Binary <- as.factor(df_factor$S_16Binary) df_factor$S_19Binary <- as.factor(df_factor$S_19Binary) df_factor$S_23Binary <- as.factor(df_factor$S_23Binary) df_factor$S_28Binary <- as.factor(df_factor$S_28Binary) df_factor$S_29Binary <- as.factor(df_factor$S_29Binary) df_factor$S_30CBinary <- as.factor(df_factor$S_30CBinary) df_factor$S_30ABinary <- as.factor(df_factor$S_30ABinary) df_factor$S_30FBinary <- as.factor(df_factor$S_30FBinary) str(df_factor$UTD_kat) str(df_recat$UTD_kat) # > df_recat$S_09B <- replace(df_recat$S_09B, df_recat$S_09B == 6, NA) > df_recat$S_09A <- replace(df_recat$S_09A, df_recat$S_09A == 6, NA) > df_recat$S_09C <- replace(df_recat$S_09C, df_recat$S_09C == 6, NA) > df_recat$S_09D <- replace(df_recat$S_09D, df_recat$S_09D == 6, NA) > df_recat$S_09E <- replace(df_recat$S_09E, df_recat$S_09E == 6, NA) > df_recat$S_09F <- replace(df_recat$S_09F, df_recat$S_09F == 6, NA) > df_recat$S_09G <- replace(df_recat$S_09G, df_recat$S_09G == 6, NA) > df_recat$S_09H <- replace(df_recat$S_09H, df_recat$S_09H == 6, NA) > df_recat$S_09I <- replace(df_recat$S_09I, df_recat$S_09I == 6, NA) > df_recat$S_09J <- replace(df_recat$S_09J, df_recat$S_09J == 6, NA) > df_recat$S_09K <- replace(df_recat$S_09K, df_recat$S_09K == 6, NA) > df_recat$S_09L <- replace(df_recat$S_09L, df_recat$S_09L == 6, NA) > df_recat$S_09M <- replace(df_recat$S_09M, df_recat$S_09M == 6, NA) > df_recat$S_09N <- replace(df_recat$S_09N, df_recat$S_09N == 6, NA) > df_recat$S_09O <- replace(df_recat$S_09O, df_recat$S_09O == 6, NA) > df_recat$S_09P <- replace(df_recat$S_09P, df_recat$S_09P == 6, NA) > df_recat$S_09Q <- replace(df_recat$S_09Q, df_recat$S_09Q == 6, NA) summary(datatree$S_09B) #To use NA as factor in factor columns datatree$S_15Dcombined <- datatree %>% (datatree$S_15Dcombined = fct_explicit_na(col, na_level = "None")) levels(datatree$S_15Dcombined) <- c(levels(datatree$S_15Dcombined), "None") remove(is.na) datatree$S_15Dcombined[is.na(datatree$S_15Dcombined)] <- "None"