我一直在嚐試部署火花毫升模型從做成頁麵通過UI,部署長跑後流產,任何特定的原因這個問題發生的原因?我也照顧的依賴仍然是失敗的。
依賴代碼塊:
conda_env = {
“依賴”:
(
“python = 3.10.9”
{
“皮普”:[" xgboost”、“pyspark = = 3.4.0”、“皮普< = 21.2.4”),
},
),
}
@Kumaran謝謝你的回複kumaram
隨機森林算法部署終於成功,sparkxgbregressor失敗。
共享代碼片段:
從xgboost。火花進口SparkXGBRegressor vec_assembler = VectorAssembler (inputCols = train_df。列(1:),outputCol =“特性”)#射頻= RandomForestRegressor (labelCol =“價格”,maxBins = 260,種子= 42)xgbr = SparkXGBRegressor (num_workers = 1, label_col =“價格”,缺少= 0.0)管道=管道(階段= [vec_assembler xgbr]) regression_evaluator = RegressionEvaluator (predictionCol =“預測”,labelCol =“價格”)regression_evaluator2 = RegressionEvaluator (predictionCol =“預測”,labelCol =“價格”,metricName = r2) def objective_function (params): #我們希望優化設置hyperparameters max_depth = params [“max_depth”] #射頻,xgb # num_trees = params [“num_trees”] #射頻n_estimators = params [“n_estimators”] # xgb與mlflow.start_run(): #估計量= pipeline.copy({射頻。maxDepth: max_depth,射頻。numTrees: num_trees}) #射頻估計量= pipeline.copy ({xgbr。max_depth: max_depth xgbr。n_estimators: n_estimators}) # xgbr模型= estimator.fit (train_df)僅僅= model.transform (test_df) rmse = regression_evaluator.evaluate(僅僅)# r2 = regression_evaluator2.evaluate mlflow(僅僅)。rmse log_metric (rmse) # mlflow.spark。log_model(模型、“模型”conda_env = mlflow.spark.get_default_conda_env ()) mlflow.spark。log_model(模型、“模型”conda_env = conda_env)返回rmse hyperopt進口惠普進口numpy np search_space = {“max_depth”:惠普。選擇(max_depth, np。(5、15 dtype = int)不等),“n_estimators”:惠普。選擇(n_estimators, np。論壇(70、80、dtype = int))} hyperopt進口fmin, tpe,試驗進口numpy np導入mlflow mlflow進口。火花# mlflow.pyspark.ml.autolog (log_models = True) mlflow.xgboost.autolog (log_models = True) # mlflow.fastai.autolog (log_models = False) num_evals = = 1試驗試驗()best_hyperparam = fmin (fn = objective_function空間= search_space算法= tpe。suggest, max_evals=num_evals, trials=trials, rstate=np.random.default_rng(42)) # Retrain model on train & validation dataset and evaluate on test dataset with mlflow.start_run(): best_max_depth = best_hyperparam["max_depth"]#rf,xgb best_n_estimators = best_hyperparam["n_estimators"]#xgb estimator = pipeline.copy({xgbr.max_depth: best_max_depth, xgbr.n_estimators: best_n_estimators})#xgb pipeline_model = estimator.fit(train_df.limit(188123)) pred_df = pipeline_model.transform(test_df) rmse = regression_evaluator.evaluate(pred_df) # Log param and metrics for the final model mlflow.log_param("maxDepth", best_max_depth) mlflow.log_param("n_estimators", best_n_estimators) mlflow.log_metric("rmse", rmse)
@Kumaran謝謝你的回複kumaram
隨機森林算法部署終於成功,sparkxgbregressor失敗。
共享代碼片段:
從xgboost。火花進口SparkXGBRegressor vec_assembler = VectorAssembler (inputCols = train_df。列(1:),outputCol =“特性”)#射頻= RandomForestRegressor (labelCol =“價格”,maxBins = 260,種子= 42)xgbr = SparkXGBRegressor (num_workers = 1, label_col =“價格”,缺少= 0.0)管道=管道(階段= [vec_assembler xgbr]) regression_evaluator = RegressionEvaluator (predictionCol =“預測”,labelCol =“價格”)regression_evaluator2 = RegressionEvaluator (predictionCol =“預測”,labelCol =“價格”,metricName = r2) def objective_function (params): #我們希望優化設置hyperparameters max_depth = params [“max_depth”] #射頻,xgb # num_trees = params [“num_trees”] #射頻n_estimators = params [“n_estimators”] # xgb與mlflow.start_run(): #估計量= pipeline.copy({射頻。maxDepth: max_depth,射頻。numTrees: num_trees}) #射頻估計量= pipeline.copy ({xgbr。max_depth: max_depth xgbr。n_estimators: n_estimators}) # xgbr模型= estimator.fit (train_df)僅僅= model.transform (test_df) rmse = regression_evaluator.evaluate(僅僅)# r2 = regression_evaluator2.evaluate mlflow(僅僅)。rmse log_metric (rmse) # mlflow.spark。log_model(模型、“模型”conda_env = mlflow.spark.get_default_conda_env ()) mlflow.spark。log_model(模型、“模型”conda_env = conda_env)返回rmse hyperopt進口惠普進口numpy np search_space = {“max_depth”:惠普。選擇(max_depth, np。(5、15 dtype = int)不等),“n_estimators”:惠普。選擇(n_estimators, np。論壇(70、80、dtype = int))} hyperopt進口fmin, tpe,試驗進口numpy np導入mlflow mlflow進口。火花# mlflow.pyspark.ml.autolog (log_models = True) mlflow.xgboost.autolog (log_models = True) # mlflow.fastai.autolog (log_models = False) num_evals = = 1試驗試驗()best_hyperparam = fmin (fn = objective_function空間= search_space算法= tpe。suggest, max_evals=num_evals, trials=trials, rstate=np.random.default_rng(42)) # Retrain model on train & validation dataset and evaluate on test dataset with mlflow.start_run(): best_max_depth = best_hyperparam["max_depth"]#rf,xgb best_n_estimators = best_hyperparam["n_estimators"]#xgb estimator = pipeline.copy({xgbr.max_depth: best_max_depth, xgbr.n_estimators: best_n_estimators})#xgb pipeline_model = estimator.fit(train_df.limit(188123)) pred_df = pipeline_model.transform(test_df) rmse = regression_evaluator.evaluate(pred_df) # Log param and metrics for the final model mlflow.log_param("maxDepth", best_max_depth) mlflow.log_param("n_estimators", best_n_estimators) mlflow.log_metric("rmse", rmse)