@sirishan wrote:
I am trying to execute the below code and it has been successfully executed till h2o.performance(regressionmodel). But getting the error message when i try to execute h2o.predict(). Kindly help me out here. Below is the error message
Error: DistributedException from localhost/127.0.0.1:54321, caused by java.lang.ArrayIndexOutOfBoundsException: 60
rm(list=ls())
train <- read.csv(file.choose())
test <- read.csv(file.choose())
test$y <- 1
combi <- rbind(train,test)
head(combi)
str(combi)names(train)
summary(train)Categorical Attributes
cat_attr <- c("X0","X1" ,"X2","X3","X4","X5","X6","X8")
cat_data <- combi[,cat_attr]
names(cat_data)
head(cat_data)Numerical Attributes
num_attr<- setdiff(names(train),cat_attr)
num_data <- combi[,num_attr]
head(num_data)
str(num_data)Seperate categorical variables and binary/numeric and convert them into appropriate type
cat_data <-data.frame(sapply(cat_data,as.factor))
num_data <- data.frame(sapply(num_data,as.numeric))finaldata <- cbind(cat_data,num_data)
sum(is.na(finaldata))Dummy Variables
library(dummies)
finaldata1 <- dummy.data.frame(data = finaldata,names = c('X0','X1','X2','X3','X4','X5','X6','X8'))
finaldata1 <- data.frame(sapply(finaldata1,as.numeric))names(finaldata1)
str(finaldata1)finaldata1 <- finaldata1[,setdiff(names(finaldata1),"ID")]
train_a <- finaldata1[1:nrow(train),]
test_a <- finaldata1[1:nrow(test),]test_a <- test_a[,setdiff(names(test_a),"y")]
install.packages("h2o")
library(h2o)localh2o <- h2o.init(nthreads = -1)
trainh2o <- as.h2o(train_a)
testh2o <- as.h2o(test_a)names(trainh2o)
regressionmodel <- h2o.glm(y = "y",x = setdiff(names(trainh2o),"y"),training_frame = trainh2o,family = "gaussian")
h2o.performance(regressionmodel)
predict.reg <- h2o.predict(regressionmodel, newdata = testh2o)
Posts: 1
Participants: 1