You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Describe the feature you want to add to this project
I am trying to make a comparison with 240 timeseries of ML models like, linear reg, and some tree based models in the following code but the output are same for all of them
import pandas as pd
from pycaret.regression import *
from sklearn.model_selection import TimeSeriesSplit
import numpy as np
def fit_models(self):
unique_ids = self.df[self.unique_id_column].unique()
for model_id in self.model_ids:
self.models[model_id] = {}
for model_id in self.model_ids:
for uid in unique_ids:
df_filtered = self.df[self.df[self.unique_id_column] == uid]
df_filtered = df_filtered.drop(columns=[self.unique_id_column]) # Assuming no additional columns are needed for modeling
# Setup data once before training models
setup(data=df_filtered, target=self.target_column, verbose=False, session_id=123)
# Splitting data using the custom time series split
custom_cv = self.custom_cv_generator(df_filtered)
model = create_model(model_id)
tuned_model = tune_model(model, fold=custom_cv) # Optional: Hyperparameter tuning
self.models[model_id][uid] = tuned_model
def custom_cv_generator(self, df):
return TimeSeriesSplitCustom(n_splits=5, max_train_size=len(df) - self.horizon, test_size=self.horizon)
def predict(self, dataframe):
results = []
for uid in self.models[self.model_ids[0]].keys():
df_filtered = dataframe[dataframe[self.unique_id_column] == uid]
df_filtered = df_filtered.drop(columns=[self.unique_id_column]) # Exclude ID for prediction
last_rows = df_filtered.tail(self.horizon) # Get the last 'horizon' rows for each UID
forecasts = {'unique_id': [uid]*self.horizon}
for model_id in self.model_ids:
model = self.models[model_id][uid]
forecast = []
for i in range(self.horizon):
prediction = predict_model(model, data=last_rows.iloc[[i]].reset_index(drop=True))
forecast.append(prediction[self.target_column].values[0])
forecasts[model_id] = forecast
uid_results = pd.DataFrame(forecasts)
results.append(uid_results)
return pd.concat(results, ignore_index=True)
Describe your proposed solution
Is there something optimizing the code overall that needs to be set to false??
Describe alternatives you've considered, if relevant
I did not find any option to turn it off.
Additional context
No response
The text was updated successfully, but these errors were encountered:
Describe the feature you want to add to this project
I am trying to make a comparison with 240 timeseries of ML models like, linear reg, and some tree based models in the following code but the output are same for all of them
import pandas as pd
from pycaret.regression import *
from sklearn.model_selection import TimeSeriesSplit
import numpy as np
class TimeSeriesSplitCustom(TimeSeriesSplit):
def init(self, n_splits=5, max_train_size=None, test_size=1, min_train_size=1):
super().init(n_splits=n_splits, max_train_size=max_train_size)
self.test_size = test_size
self.min_train_size = min_train_size
class ModelTrainer:
def init(self, dataframe, unique_id_column, target_column, horizon):
self.df = dataframe
self.unique_id_column = unique_id_column
self.target_column = target_column
self.horizon = horizon
self.models = {}
self.model_ids = ['lr', 'rf', 'xgboost', 'lightgbm'] # Add more models as needed
Describe your proposed solution
Is there something optimizing the code overall that needs to be set to false??
Describe alternatives you've considered, if relevant
I did not find any option to turn it off.
Additional context
No response
The text was updated successfully, but these errors were encountered: