I'd like to know if it's possible to retrieve the path in the regression tree (pyspark) for an observation
The model code is as follows
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from pyspark.ml.classification import DecisionTreeClassifier, DecisionTreeClassificationModel, GBTClassifier,GBTClassificationModel
from pyspark.ml.regression import DecisionTreeRegressor, DecisionTreeRegressionModel
from pyspark.ml.evaluation import BinaryClassificationEvaluator, RegressionEvaluator
dt = DecisionTreeRegressor(maxBins=10000, seed=0)
cv_dt_evaluator = RegressionEvaluator(
metricName='rmse'
)
dt_grid = ParamGridBuilder()\
.addGrid(
dt.minInstancesPerNode, [100]
)\
.addGrid(
dt.maxDepth, [10]
)\
.build()
cv = CrossValidator(
estimator=dt, estimatorParamMaps=dt_grid, evaluator=cv_dt_evaluator,
parallelism=4,
numFolds=4
)
cv_model = cv.fit(train)
clf_best = clf.bestModel
Thank