See: Description
$ java -jar h2o.jar
$ mkdir experiment$ cd experiment$ mv ~/Downloads/gbm_pojo_test.java .$ curl http://localhost:54321/3/h2o-genmodel.jar > h2o-genmodel.jar
import java.io.*;import hex.genmodel.easy.RowData;import hex.genmodel.easy.EasyPredictModelWrapper;import hex.genmodel.easy.prediction.*;public class main { private static String modelClassName = "gbm_pojo_test"; public static void main(String[] args) throws Exception { hex.genmodel.GenModel rawModel; rawModel = (hex.genmodel.GenModel) Class.forName(modelClassName).newInstance(); EasyPredictModelWrapper model = new EasyPredictModelWrapper(rawModel); // // By default, unknown categorical levels throw PredictUnknownCategoricalLevelException. // Optionally configure the wrapper to treat unknown categorical levels as N/A instead // and strings that cannot be converted to numbers also to N/As: // // EasyPredictModelWrapper model = new EasyPredictModelWrapper( // new EasyPredictModelWrapper.Config() // .setModel(rawModel) // .setConvertUnknownCategoricalLevelsToNa(true) // .setConvertInvalidNumbersToNa(true) // ); RowData row = new RowData(); row.put("Year", "1987"); row.put("Month", "10"); row.put("DayofMonth", "14"); row.put("DayOfWeek", "3"); row.put("CRSDepTime", "730"); row.put("UniqueCarrier", "PS"); row.put("Origin", "SAN"); row.put("Dest", "SFO"); BinomialModelPrediction p = model.predictBinomial(row); System.out.println("Label (aka prediction) is flight departure delayed: " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { if (i > 0) { System.out.print(","); } System.out.print(p.classProbabilities[i]); } System.out.println(""); }}
$ javac -cp h2o-genmodel.jar -J-Xmx2g -J-XX:MaxPermSize=128m gbm_pojo_test.java main.java# Linux and OS X users$ java -cp .:h2o-genmodel.jar main# Windows users$ java -cp .;h2o-genmodel.jar mainThe following output displays:
Label (aka prediction) is flight departure delayed: YES Class probabilities: 0.4790490513429604,0.5209509486570396
library(h2o)h2o.init()path <- system.file("extdata", "prostate.csv", package = "h2o")h2o_df <- h2o.importFile(path)h2o_df$CAPSULE <- as.factor(h2o_df$CAPSULE)model <- h2o.glm(y = "CAPSULE", x = c("AGE", "RACE", "PSA", "GLEASON"), training_frame = h2o_df, family = "binomial")h2o.download_pojo(model)
import h2oh2o.init()from h2o.estimators.glm import H2OGeneralizedLinearEstimatorpath = "http://s3.amazonaws.com/h2o-public-test-data/smalldata/prostate/prostate.csv.zip"h2o_df = h2o.import_file(path)h2o_df['CAPSULE'] = h2o_df['CAPSULE'].asfactor()model = H2OGeneralizedLinearEstimator(family = "binomial")model.train(y = "CAPSULE", x = ["AGE", "RACE", "PSA", "GLEASON"], training_frame = h2o_df)h2o.download_pojo(model)
import org.apache.spark.h2o._val h2oContext = H2OContext.getOrCreate(sc)import h2oContext._import org.apache.spark.examples.h2o._import java.io.Fileval dataFile = "examples/smalldata/allyears2k_headers.csv.gz"val airlinesData = new H2OFrame(new File(dataFile))import _root_.hex.tree.gbm.GBMimport _root_.hex.tree.gbm.GBMModel.GBMParametersval gbmParams = new GBMParameters() gbmParams._train = airlinesData gbmParams._response_column = 'IsDepDelayed gbmParams._model_id = water.Key.make("model") gbmParams._ntrees = 5 gbmParams._max_depth = 2val gbm = new GBM(gbmParams)val model = gbm.trainModel.getimport water._import _root_.hex._import java.net.URIimport water.serial.ObjectTreeBinarySerializerdef exportPOJOModel(model : Model[_, _,_], destination: URI): URI = { val destFile = new File(destination) val fos = new java.io.FileOutputStream(destFile) val writer = new model.JavaModelStreamWriter(false) try { writer.writeTo(fos) } finally { fos.close() } destination}exportPOJOModel(model, new File("./GbmModel.java").toURI)
PredictCsv
class is used by the H2O test harness to makepredictions on new data points.download_mojo()
function saves the model as a zip file. You canunzip the file to view the options used to build the file along with eachtree built in the model. Note that each tree file is saved as a binary file type.library(h2o) h2o.init(nthreads=-1) path <- system.file("extdata", "prostate.csv", package="h2o") h2o_df <- h2o.importFile(path) h2o_df$CAPSULE <- as.factor(h2o_df$CAPSULE) model <- h2o.gbm(y="CAPSULE", x=c("AGE", "RACE", "PSA", "GLEASON"), training_frame=h2o_df, distribution="bernoulli", ntrees=100, max_depth=4, learn_rate=0.1)
modelfile <- h2o.download_mojo(model, path="~/experiments/", get_genmodel_jar=TRUE) print("Model saved to " + modelfile) Model saved to /Users/user/GBM_model_R_1475248925871_74.zip"
import h2o from h2o.estimators.gbm import H2OGradientBoostingEstimator h2o.init() h2o_df = h2o.load_dataset("prostate.csv") h2o_df["CAPSULE"] = h2o_df["CAPSULE"].asfactor() model=H2OGradientBoostingEstimator(distribution="bernoulli", ntrees=100, max_depth=4, learn_rate=0.1) model.train(y="CAPSULE", x=["AGE","RACE","PSA","GLEASON"], training_frame=h2o_df)
modelfile = model.download_mojo(path="~/experiment/", get_genmodel_jar=True) print("Model saved to " + modelfile) Model saved to /Users/user/GBM_model_python_1475248925871_888.zip
$ cd experiment
import java.io.*; import hex.genmodel.easy.RowData; import hex.genmodel.easy.EasyPredictModelWrapper; import hex.genmodel.easy.prediction.*; import hex.genmodel.MojoModel; public class main { public static void main(String[] args) throws Exception { EasyPredictModelWrapper model = new EasyPredictModelWrapper(MojoModel.load("GBM_model_R_1475248925871_74.zip")); RowData row = new RowData(); row.put("AGE", "68"); row.put("RACE", "2"); row.put("DCAPS", "2"); row.put("VOL", "0"); row.put("GLEASON", "6"); BinomialModelPrediction p = model.predictBinomial(row); System.out.println("Has penetrated the prostatic capsule (1=yes; 0=no): " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { if (i > 0) { System.out.print(","); } System.out.print(p.classProbabilities[i]); } System.out.println(""); } }GBM and DRF return classProbabilities, but not all MOJOs will return a classProbabilities field. Refer to the ModelPrediction definition for each algorithm to find the correct field(s) to access. This is available in the H2O-3 GitHub repo at: https://github.com/h2oai/h2o-3/tree/master/h2o-genmodel/src/main/java/hex/genmodel/easy/prediction. In addition to classProbabilities, in GBM and DRF you can also choose to generate the leafNodeAssignments field, which will show the decision path through each tree. Note that this may slow down the MOJO as it adds computation. Below is the Java code showing how return the leaf node assignment:
import java.io.*; import hex.genmodel.easy.RowData; import hex.genmodel.easy.EasyPredictModelWrapper; import hex.genmodel.easy.prediction.*; import hex.genmodel.MojoModel; public class main { public static void main(String[] args) throws Exception { EasyPredictModelWrapper.Config config = new EasyPredictModelWrapper.Config().setModel(MojoModel.load("GBM_model_R_1475248925871_74.zip")).setEnableLeafAssignment(true); EasyPredictModelWrapper model = new EasyPredictModelWrapper(config); RowData row = new RowData(); row.put("AGE", "68"); row.put("RACE", "2"); row.put("DCAPS", "2"); row.put("VOL", "0"); row.put("GLEASON", "6"); BinomialModelPrediction p = model.predictBinomial(row); System.out.println("Has penetrated the prostatic capsule (1=yes; 0=no): " + p.label); System.out.print("Class probabilities: "); for (int i = 0; i < p.classProbabilities.length; i++) { if (i > 0) { System.out.print(","); } System.out.print(p.classProbabilities[i]); } System.out.println("Leaf node assighnments: "); for (int i=0; i < p.leafNodeAssignments; i++) { if (i > 0) { System.out.print.(p.leafNodeAssignments[i]); } } System.out.println(""); } }
$ javac -cp h2o-genmodel.jar -J-Xms2g -J-XX:MaxPermSize=128m main.java # Linux and OS X users $ java -cp .:h2o-genmodel.jar main # Windows users $ java -cp .;h2o-genmodel.jar main
Has penetrated the prostatic capsule (1 yes; 0 no): 0Class probabilities: 0.8059929056296662,0.19400709437033375
library(h2o)h2o.init()df <- h2o.importFile("http://s3.amazonaws.com/h2o-public-test-data/smalldata/airlines/allyears2k_headers.zip")model <- h2o.gbm(model_id = "model", training_frame = df, x = c("Year", "Month", "DayofMonth", "DayOfWeek", "UniqueCarrier"), y = "IsDepDelayed", max_depth = 3, ntrees = 5)h2o.download_mojo(model, getwd(), FALSE)# Now download the latest stable h2o release from http://www.h2o.ai/download/# and run the PrintMojo tool from the command line.## (For MacOS: brew install graphviz)java -cp h2o.jar hex.genmodel.tools.PrintMojo --tree 0 -i model.zip -o model.gv -f 20 -d 3dot -Tpng model.gv -o model.pngopen model.png
<groupId>ai.h2o</groupId><artifactId>xgboost-mojo-example</artifactId><version>1.0-SNAPSHOT</version><dependencies> <dependency> <groupId>ai.h2o</groupId> <artifactId>h2o-genmodel-ext-xgboost</artifactId> <version>3.20.0.3</version> </dependency> <dependency> <groupId>ai.h2o</groupId> <artifactId>h2o-genmodel</artifactId> <version>3.20.0.3</version> </dependency></dependencies>