The Machine Learning Guide for Predictive Accuracy: Interpolation and Extrapolation – Towards Data Science
class ModelFitterAndVisualizer: def __init__(self, X_train, y_train, y_truth, scaling=False, random_state=41): """ Initialize the ModelFitterAndVisualizer class with training and testing data.
Parameters: X_train (pd.DataFrame): Training data features y_train (pd.Series): Training data target y_truth (pd.Series): Ground truth for predictions scaling (bool): Flag to indicate if scaling should be applied random_state (int): Seed for random number generation """ self.X_train = X_train self.y_train = y_train self.y_truth = y_truth
self.initialize_models(random_state)
self.scaling = scaling
# Initialize models # ----------------------------------------------------------------- def initialize_models(self, random_state): """ Initialize the models to be used for fitting and prediction.
Parameters: random_state (int): Seed for random number generation """
# Define kernel for GPR kernel = 1.0 * RBF(length_scale=1.0) + WhiteKernel(noise_level=1.0)
# Define Ensemble Models Estimator # Decision Tree + Kernel Method estimators_rf_svr = [ ('rf', RandomForestRegressor(n_estimators=30, random_state=random_state)), ('svr', SVR(kernel='rbf')), ] estimators_rf_gpr = [ ('rf', RandomForestRegressor(n_estimators=30, random_state=random_state)), ('gpr', GaussianProcessRegressor(kernel=kernel, normalize_y=True, random_state=random_state)) ] # Decision Trees estimators_rf_xgb = [ ('rf', RandomForestRegressor(n_estimators=30, random_state=random_state)), ('xgb', xgb.XGBRegressor(random_state=random_state)), ]
self.models = [ SymbolicRegressor(random_state=random_state), SVR(kernel='rbf'), GaussianProcessRegressor(kernel=kernel, normalize_y=True, random_state=random_state), DecisionTreeRegressor(random_state=random_state), RandomForestRegressor(random_state=random_state), xgb.XGBRegressor(random_state=random_state), lgbm.LGBMRegressor(n_estimators=50, num_leaves=10, min_child_samples=3, random_state=random_state), VotingRegressor(estimators=estimators_rf_svr), StackingRegressor(estimators=estimators_rf_svr, final_estimator=RandomForestRegressor(random_state=random_state)), VotingRegressor(estimators=estimators_rf_gpr), StackingRegressor(estimators=estimators_rf_gpr, final_estimator=RandomForestRegressor(random_state=random_state)), VotingRegressor(estimators=estimators_rf_xgb), StackingRegressor(estimators=estimators_rf_xgb, final_estimator=RandomForestRegressor(random_state=random_state)), ]
# Define graph titles self.titles = [ "Ground Truth", "Training Points", "SymbolicRegressor", "SVR", "GPR", "DecisionTree", "RForest", "XGBoost", "LGBM", "Vote_rf_svr", "Stack_rf_svr__rf", "Vote_rf_gpr", "Stack_rf_gpr__rf", "Vote_rf_xgb", "Stack_rf_xgb__rf", ]
def fit_models(self): """ Fit the models to the training data.
Returns: self: Instance of the class with fitted models """ if self.scaling: scaler_X = MinMaxScaler() self.X_train_scaled = scaler_X.fit_transform(self.X_train) else: self.X_train_scaled = self.X_train.copy()
for model in self.models: model.fit(self.X_train_scaled, self.y_train) return self
def visualize_surface(self, x0, x1, width=400, height=500, num_panel_columns=5, vertical_spacing=0.06, horizontal_spacing=0, output=None, display=False, return_fig=False): """ Visualize the prediction surface for each model.
Parameters: x0 (np.ndarray): Meshgrid for feature 1 x1 (np.ndarray): Meshgrid for feature 2 width (int): Width of the plot height (int): Height of the plot output (str): File path to save the plot display (bool): Flag to display the plot """
num_plots = len(self.models) + 2 num_panel_rows = num_plots // num_panel_columns
whole_width = width * num_panel_columns whole_height = height * num_panel_rows
specs = [[{'type': 'surface'} for _ in range(num_panel_columns)] for _ in range(num_panel_rows)] fig = make_subplots(rows=num_panel_rows, cols=num_panel_columns, specs=specs, subplot_titles=self.titles, vertical_spacing=vertical_spacing, horizontal_spacing=horizontal_spacing)
for i, model in enumerate([None, None] + self.models): # Assign the subplot panels row = i // num_panel_columns + 1 col = i % num_panel_columns + 1
# Plot training points if i == 1: fig.add_trace(go.Scatter3d(x=self.X_train[:, 0], y=self.X_train[:, 1], z=self.y_train, mode='markers', marker=dict(size=2, color='darkslategray'), name='Training Data'), row=row, col=col)
surface = go.Surface(z=self.y_truth, x=x0, y=x1, showscale=False, opacity=.4) fig.add_trace(surface, row=row, col=col)
# Plot predicted surface for each model and ground truth else: y_pred = self.y_truth if model is None else model.predict(np.c_[x0.ravel(), x1.ravel()]).reshape(x0.shape) surface = go.Surface(z=y_pred, x=x0, y=x1, showscale=False) fig.add_trace(surface, row=row, col=col)
fig.update_scenes(dict( xaxis_title='x0', yaxis_title='x1', zaxis_title='y', ), row=row, col=col)
fig.update_layout(title='Model Predictions and Ground Truth', width=whole_width, height=whole_height)
# Change camera angle camera = dict( up=dict(x=0, y=0, z=1), center=dict(x=0, y=0, z=0), eye=dict(x=-1.25, y=-1.25, z=2) ) for i in range(num_plots): fig.update_layout(**{f'scene{i+1}_camera': camera})
if display: fig.show()
if output: fig.write_html(output)
if return_fig: return fig
Read this article:
The Machine Learning Guide for Predictive Accuracy: Interpolation and Extrapolation - Towards Data Science
- Google is experimenting with machine learning-powered age-estimation tech in the US - TechCrunch - August 1st, 2025 [August 1st, 2025]
- Google Will Use Machine Learning to Estimate Users Age and Block Them From Restricted Content and Ads - Adweek - August 1st, 2025 [August 1st, 2025]
- A thermodynamic approach to machine learning: How optimal transport theory can improve generative models - Tech Xplore - August 1st, 2025 [August 1st, 2025]
- Machine Learning Transforms Immunotherapy in Metastatic NSCLC - BIOENGINEER.ORG - August 1st, 2025 [August 1st, 2025]
- Clinical decision support for vestibular diagnosis: large-scale machine learning with lived experience coaching - Nature - August 1st, 2025 [August 1st, 2025]
- Graph theoretic and machine learning approaches in molecular property prediction of bladder cancer therapeutics - Nature - August 1st, 2025 [August 1st, 2025]
- Automotive Battery Management System Market Outlook Report 2025-2034 | AI and Machine Learning Transforming the BMS Technology Landscape - Yahoo.co - August 1st, 2025 [August 1st, 2025]
- Machine learning model predicts radiotherapy response in patients with nasopharyngeal carcinoma - News-Medical - August 1st, 2025 [August 1st, 2025]
- Google is experimenting with machine learning-powered age-estimation tech in the US - Yahoo Finance - August 1st, 2025 [August 1st, 2025]
- Identification and validation of an explainable machine learning model for vascular depression diagnosis in the older adults: a multicenter cohort... - August 1st, 2025 [August 1st, 2025]
- Machine learning-based high-benefit approach versus traditional high-risk approach in statin therapy: the Shizuoka Kokuho database study - Nature - August 1st, 2025 [August 1st, 2025]
- Investigating the Impact of the Stationarity Hypothesis on Heart Failure Detection using Deep Convolutional Scattering Networks and Machine Learning -... - August 1st, 2025 [August 1st, 2025]
- Predicting Sepsis with Machine Learning and Lab-on-a-Chip - Electropages - August 1st, 2025 [August 1st, 2025]
- Classification accuracy of pain intensity induced by leg blood flow restriction during walking using machine learning based on electroencephalography... - August 1st, 2025 [August 1st, 2025]
- Machine learning-based drug-drug interaction prediction: a critical review of models, limitations, and data challenges - Frontiers - August 1st, 2025 [August 1st, 2025]
- AI and Machine Learning - AI and geospatial companies join forces to map Africa - Smart Cities World - July 30th, 2025 [July 30th, 2025]
- Summer research project explores alternative machine learning framework - Mercer University - July 30th, 2025 [July 30th, 2025]
- Unveiling multiscale drivers of wind speed in Michigan using machine learning - Nature - July 30th, 2025 [July 30th, 2025]
- New machine learning tool reveals atomic structure of ultra-thin film materials - Phys.org - July 28th, 2025 [July 28th, 2025]
- Optimizing base fluid composition for PEMFC cooling: A machine learning approach to balance thermal and rheological performance - Nature - July 28th, 2025 [July 28th, 2025]
- Overview: Machine learning in the medical space - Scientist Live - July 28th, 2025 [July 28th, 2025]
- IMD develops a novel machine-learning-based tool to predict urban rainfall trends in India - Research Matters - July 28th, 2025 [July 28th, 2025]
- Unsupervised System 2 Thinking: The Next Leap in Machine Learning with Energy-Based Transformers - MarkTechPost - July 27th, 2025 [July 27th, 2025]
- A machine learning-based approach to predict depression in Chinese older adults with subjective cognitive decline: a longitudinal study - Nature - July 27th, 2025 [July 27th, 2025]
- Machine Learning Identifies Role of Impaired Purine Metabolism in Gout Pathogenesis - HCPLive - July 27th, 2025 [July 27th, 2025]
- Detection of breast cancer using machine learning and explainable artificial intelligence - Nature - July 27th, 2025 [July 27th, 2025]
- Investigation of key ferroptosis-associated genes and potential therapeutic drugs for asthma based on machine learning and regression models - Nature - July 27th, 2025 [July 27th, 2025]
- Predicting postoperative trauma-induced coagulopathy in patients with severe injuries by machine learning - Nature - July 27th, 2025 [July 27th, 2025]
- Machine learning based multi-stage intrusion detection system and feature selection ensemble security in cloud assisted vehicular ad hoc networks -... - July 27th, 2025 [July 27th, 2025]
- Comparative analysis of machine learning models for malaria detection using validated synthetic data: a cost-sensitive approach with clinical domain... - July 27th, 2025 [July 27th, 2025]
- Statistical modelling and forecasting of HIV and anti-retroviral therapy cases by time-series and machine learning models - Nature - July 27th, 2025 [July 27th, 2025]
- Seeing Through the Rust: How Machine Learning is Improving Corrosion Detection - Research Matters - July 27th, 2025 [July 27th, 2025]
- Machine-Learning Approach to Increase the Potency and Overcome the Hemolytic Toxicity of Gramicidin S - ACS Publications - July 24th, 2025 [July 24th, 2025]
- Machine learning-based academic performance prediction with explainability for enhanced decision-making in educational institutions - Nature - July 24th, 2025 [July 24th, 2025]
- Can External Validation Tools Can Improve Annotation Quality for LLM-as-a-Judge - Apple Machine Learning Research - July 24th, 2025 [July 24th, 2025]
- How to use learning curves to evaluate the sample size for malaria prediction models developed using machine learning algorithms - Malaria Journal - July 24th, 2025 [July 24th, 2025]
- Development and validation of a dynamic early warning system with time-varying machine learning models for predicting hemodynamic instability in... - July 24th, 2025 [July 24th, 2025]
- Early and non-destructive prediction of the differentiation efficiency of human induced pluripotent stem cells using imaging and machine learning -... - July 24th, 2025 [July 24th, 2025]
- Algorithmica Reports 35% Return in First Fiscal Year, Driven by Machine Learning Trading Technology - PR Newswire - July 24th, 2025 [July 24th, 2025]
- New research using machine learning further links increase in earthquakes, quake intensity, in Raton Basin to wastewater injections - The... - July 24th, 2025 [July 24th, 2025]
- Early modern text transcription revolutionized by ethical machine learning tools - Archaeology News Online Magazine - July 22nd, 2025 [July 22nd, 2025]
- Role of Artificial Intelligence and Machine Learning in Conservative Dentistry and Endodontics: A Review - Cureus - July 22nd, 2025 [July 22nd, 2025]
- NTT Researchers Advance AI and Machine Learning Accuracy, Security and Cost Effectiveness at ICML 2025 - Business Wire - July 22nd, 2025 [July 22nd, 2025]
- Exploring Phase Stability and Transport Properties of Emerging Thermoelectric Materials: Machine Learning and Experimental Insights - ACS Publications - July 22nd, 2025 [July 22nd, 2025]
- Google expands Ad Manager partner guidelines with machine learning restrictions - PPC Land - July 22nd, 2025 [July 22nd, 2025]
- Leveraging Generative AI into Wargaming and Machine Learning to Shape War Termination Scenarios in Ukraine - oodaloop.com - July 22nd, 2025 [July 22nd, 2025]
- Predictive AI Too Hard To Use? GenAI Makes It Easy - Machine Learning Week 2025 - July 22nd, 2025 [July 22nd, 2025]
- Wheat is becoming more climate-resilient through nature-based plant breeding and machine learning - Phys.org - July 22nd, 2025 [July 22nd, 2025]
- Machine learning enhanced ultra-high vacuum system for predicting field emission performance in graphene reinforced aluminium based metal matrix... - July 22nd, 2025 [July 22nd, 2025]
- Machine learning-guided evolution of pyrrolysyl-tRNA synthetase for improved incorporation efficiency of diverse noncanonical amino acids - Nature - July 22nd, 2025 [July 22nd, 2025]
- Dietary intervention optimized using machine learning could lower risk of dementia - Medical Xpress - July 20th, 2025 [July 20th, 2025]
- Application of machine learning algorithms and SHAP explanations to predict fertility preference among reproductive women in Somalia - Nature - July 20th, 2025 [July 20th, 2025]
- From Reactive to Predictive: Forecasting Network Congestion with Machine Learning and INT - Towards Data Science - July 20th, 2025 [July 20th, 2025]
- Artificial intelligence and machine learning in the development of vaccines and immunotherapeuticsyesterday, today, and tomorrow - Frontiers - July 20th, 2025 [July 20th, 2025]
- How Machine Learning is Revolutionizing Threat Detection for Businesses in Real-Time - Eye On Annapolis - July 20th, 2025 [July 20th, 2025]
- Identification of clinical diagnostic and immune cell infiltration characteristics of acute myocardial infarction with machine learning approach -... - July 20th, 2025 [July 20th, 2025]
- Predicting the mechanical performance of industrial waste incorporated sustainable concrete using hybrid machine learning modeling and parametric... - July 20th, 2025 [July 20th, 2025]
- Integrative multi-omics and machine learning reveal critical functions of proliferating cells in prognosis and personalized treatment of lung... - July 20th, 2025 [July 20th, 2025]
- Systematic measurement and machine learning-based profile characterization of community noise in a medium-large city in the United States - Nature - July 20th, 2025 [July 20th, 2025]
- Prediction of birthweight with early and mid-pregnancy antenatal markers utilising machine learning and explainable artificial intelligence - Nature - July 20th, 2025 [July 20th, 2025]
- A comprehensive machine learning for high throughput Tuberculosis sequence analysis, functional annotation, and visualization - Nature - July 20th, 2025 [July 20th, 2025]
- AI and Machine Learning Skills Are Make or Break for Developers: 71% of Tech Leaders Wont Hire Without Them - The National Law Review - July 20th, 2025 [July 20th, 2025]
- Quality-of-life scale machine learning approach to predict immunotherapy response in patients with advanced non-small cell lung cancer - Frontiers - July 20th, 2025 [July 20th, 2025]
- Inversion and validation of soil water-holding capacity in a wild fruit forest, using hyperspectral technology combined with machine learning - Nature - July 20th, 2025 [July 20th, 2025]
- Machine Learning in Drug Discovery Market to Witness Exponential Growth: Key Players, $250M Eli Lilly Deal & Regional Insights for 2025-2034 -... - July 18th, 2025 [July 18th, 2025]
- Automated seafood freshness detection and preservation analysis using machine learning and paper-based pH sensors - Nature - July 18th, 2025 [July 18th, 2025]
- Do You Know What It Means To Train a Machine Learning Model? - LSU - July 18th, 2025 [July 18th, 2025]
- Establishment of an interpretable MRI radiomics-based machine learning model capable of predicting axillary lymph node metastasis in invasive breast... - July 18th, 2025 [July 18th, 2025]
- A Machine Learning-Reconstructed Dataset of River Discharge, Temperature, and Heat Flux into the Arctic Ocean - Nature - July 18th, 2025 [July 18th, 2025]
- Leveraging computational linguistics and machine learning for detection of ultra-high risk of mental health disorders in youths | Schizophrenia -... - July 18th, 2025 [July 18th, 2025]
- Development and validation of machine learning-based diagnostic models using blood transcriptomics for early childhood diabetes prediction - Frontiers - July 18th, 2025 [July 18th, 2025]
- Fatigue and stamina prediction of athletic person on track using thermal facial biomarkers and optimized machine learning algorithm - Nature - July 18th, 2025 [July 18th, 2025]
- Identifying the crucial oncogenic mechanisms of DDX56 based on a machine learning-based integration model of RNA-binding proteins - Nature - July 18th, 2025 [July 18th, 2025]
- AI and Machine Learning Skills Are Make or Break for Developers: 71% of Tech Leaders Wont Hire Without Them - Yahoo Finance - July 18th, 2025 [July 18th, 2025]
- Developing an explainable machine learning and fog computing-based visual rating scale for the prediction of dementia progression - Nature - July 18th, 2025 [July 18th, 2025]
- Prognosis of air quality index and air pollution using machine learning techniques - Nature - July 18th, 2025 [July 18th, 2025]
- Integrating vision transformer-based deep learning model with kernel extreme learning machine for non-invasive diagnosis of neonatal jaundice using... - July 18th, 2025 [July 18th, 2025]
- PlayStation 6 Likely to Feature 24 GB RAM for Advanced Ray Tracing and Machine Learning Without Raising Costs - Wccftech - July 18th, 2025 [July 18th, 2025]
- Machine Learning-Assisted Iterative Screening for Efficient Detection of Drug Discovery Starting Points - ACS Publications - July 16th, 2025 [July 16th, 2025]
- 2025 IT Camp on AI & Machine Learning for Beginners to be held August 5 - Southeastern Oklahoma State University - July 16th, 2025 [July 16th, 2025]