Sales Forecasting with SQL Server ML Services

This section provides hands-on code samples and guidance for building and deploying sophisticated sales forecasting models directly within SQL Server using Machine Learning Services. Leverage the power of R and Python within your database to predict future sales trends, optimize inventory, and make data-driven business decisions.

Why Sales Forecasting with ML Services?

Integrating sales forecasting into SQL Server ML Services offers several key advantages:

  • Data Locality: Keep your data and models together, reducing data movement and improving security.
  • Performance: Leverage the power of SQL Server for data manipulation and model execution.
  • Scalability: Scale your forecasting solutions as your data grows.
  • Unified Workflow: Manage data preparation, model training, and prediction within a single database environment.

Sample Scenarios and Code

Below are some common sales forecasting scenarios with sample scripts to get you started.

Basic Time Series Forecasting (ARIMA)

Implement a standard ARIMA model for time series forecasting using R. This sample focuses on identifying trends, seasonality, and noise in historical sales data.

View Details & Code

Advanced Forecasting with Prophet

Utilize the Prophet library (developed by Facebook) for robust forecasting, especially effective with data containing strong seasonal effects and missing values.

View Details & Code

Forecasting with External Regressors

Incorporate external factors such as promotions, holidays, or economic indicators into your forecasting models to improve accuracy.

View Details & Code

Deploying a Forecasting Model

Learn how to save your trained forecasting model and deploy it as a stored procedure for on-demand predictions within SQL Server.

View Details & Code

ARIMA Forecasting Sample

This sample demonstrates how to use the forecast package in R to build an ARIMA model.


-- Prerequisites: Ensure R services are installed and enabled.
-- You'll need a table with historical sales data, e.g., SalesData(SaleDate DATE, SalesAmount DECIMAL)

-- Stored Procedure to train and predict using ARIMA
CREATE PROCEDURE dbo.sp_ForecastSalesARIMA
    @TrainingDataSize INT = 200 -- Number of historical data points to use for training
AS
BEGIN
    SET NOCOUNT ON;

    -- R script for ARIMA forecasting
    DECLARE @RScript NVARCHAR(MAX);
    SET @RScript = N'
    library(forecast);

    # Fetch training data from SQL Server
    sql_query <- "SELECT SaleDate, SalesAmount FROM SalesData ORDER BY SaleDate DESC OFFSET 0 ROWS FETCH NEXT @TrainingDataSize ROWS ONLY";
    sales_data <- sqlQuery(connection, sql_query, stringsAsFactors = FALSE);
    sales_data$SaleDate <- as.Date(sales_data$SaleDate);
    sales_data <- sales_data[order(sales_data$SaleDate), ]; # Ensure chronological order

    # Create a time series object
    sales_ts <- ts(sales_data$SalesAmount, frequency = 12); # Assuming monthly data

    # Fit an ARIMA model
    arima_model <- auto.arima(sales_ts);

    # Forecast next 12 months
    forecast_result <- forecast(arima_model, h = 12);

    # Convert forecast results to data frame
    forecast_df <- data.frame(
        Date = seq(max(sales_data$SaleDate), by = "month", length.out = 13)[-1],
        ForecastSales = forecast_result$mean,
        Lo80 = forecast_result$lower[, "80%"],
        Hi80 = forecast_result$upper[, "80%"],
        Lo95 = forecast_result$lower[, "95%"],
        Hi95 = forecast_result$upper[, "95%"]
    );

    # Return forecast as a table
    return(forecast_df);
    ';

    -- Execute the R script using sp_execute_external_script
    EXEC sp_execute_external_script
        @language = N'R',
        @script = @RScript,
        @params = N'@TrainingDataSize INT',
        @TrainingDataSize = @TrainingDataSize
    WITH RESULT SETS ((
        ForecastDate DATE,
        ForecastAmount DECIMAL(18, 2),
        Lower80Confidence DECIMAL(18, 2),
        Upper80Confidence DECIMAL(18, 2),
        Lower95Confidence DECIMAL(18, 2),
        Upper95Confidence DECIMAL(18, 2)
    ));
END;
GO

-- Example Usage:
-- EXEC dbo.sp_ForecastSalesARIMA @TrainingDataSize = 100;
                    

Prophet Forecasting Sample

This sample uses the prophet library in R for more flexible forecasting.


-- Prerequisites: Ensure R services are installed and enabled.
-- Ensure the 'prophet' R package is installed: install.packages("prophet")

-- Stored Procedure for Prophet forecasting
CREATE PROCEDURE dbo.sp_ForecastSalesProphet
    @TrainingDataSize INT = 200
AS
BEGIN
    SET NOCOUNT ON;

    DECLARE @RScript NVARCHAR(MAX);
    SET @RScript = N'
    library(prophet);

    # Fetch training data
    sql_query <- "SELECT SaleDate, SalesAmount FROM SalesData ORDER BY SaleDate ASC OFFSET 0 ROWS FETCH NEXT @TrainingDataSize ROWS ONLY";
    sales_data <- sqlQuery(connection, sql_query, stringsAsFactors = FALSE);

    # Prophet requires columns named ds and y
    colnames(sales_data) <- c("ds", "y");
    sales_data$ds <- as.Date(sales_data$ds);

    # Initialize and fit Prophet model
    prophet_model <- prophet(sales_data, daily.seasonality = FALSE, weekly.seasonality = TRUE, monthly.seasonality = TRUE);

    # Create future dates dataframe for prediction (next 12 months)
    future_dates <- make_future_dataframe(prophet_model, periods = 12, freq = "month");

    # Make predictions
    forecast_result <- predict(prophet_model, future_dates);

    # Extract relevant columns
    forecast_df <- data.frame(
        Date = forecast_result$ds,
        ForecastSales = forecast_result$yhat,
        LowerConfidence = forecast_result$yhat_lower,
        UpperConfidence = forecast_result$yhat_upper
    );

    # Filter to show only future predictions
    forecast_df_future <- forecast_df[forecast_df$Date > max(sales_data$ds), ];

    return(forecast_df_future);
    ';

    EXEC sp_execute_external_script
        @language = N'R',
        @script = @RScript,
        @params = N'@TrainingDataSize INT',
        @TrainingDataSize = @TrainingDataSize
    WITH RESULT SETS ((
        ForecastDate DATE,
        ForecastAmount DECIMAL(18, 2),
        LowerConfidence DECIMAL(18, 2),
        UpperConfidence DECIMAL(18, 2)
    ));
END;
GO

-- Example Usage:
-- EXEC dbo.sp_ForecastSalesProphet @TrainingDataSize = 150;
                    

Forecasting with External Regressors

This example shows how to add promotional event data to improve forecast accuracy using Prophet.


-- Prerequisites: R services, Prophet package.
-- You'll need a table for promotional events: Promotions(PromoDate DATE, IsPromotion BIT)

-- Stored Procedure for Prophet with regressors
CREATE PROCEDURE dbo.sp_ForecastSalesProphetWithRegressors
    @TrainingDataSize INT = 200
AS
BEGIN
    SET NOCOUNT ON;

    DECLARE @RScript NVARCHAR(MAX);
    SET @RScript = N'
    library(prophet);

    # Fetch sales data
    sql_sales_query <- "SELECT SaleDate, SalesAmount FROM SalesData ORDER BY SaleDate ASC OFFSET 0 ROWS FETCH NEXT @TrainingDataSize ROWS ONLY";
    sales_data <- sqlQuery(connection, sql_sales_query, stringsAsFactors = FALSE);
    colnames(sales_data) <- c("ds", "y");
    sales_data$ds <- as.Date(sales_data$ds);

    # Fetch promotion data
    sql_promo_query <- "SELECT PromoDate, IsPromotion FROM Promotions ORDER BY PromoDate ASC";
    promo_data <- sqlQuery(connection, sql_promo_query, stringsAsFactors = FALSE);
    promo_data$PromoDate <- as.Date(promo_data$PromoDate);
    colnames(promo_data) <- c("ds", "promo");
    promo_data$promo <- as.integer(promo_data$promo); # Prophet expects integer/numeric

    # Merge promotion data into sales data (inner join is fine if dates align)
    # For simplicity, assuming sales_data already covers the relevant period or can be joined
    # In a real scenario, you might need a more robust date handling or left join
    full_data <- merge(sales_data, promo_data, by = "ds", all.x = TRUE);
    full_data$promo[is.na(full_data$promo)] <- 0; # Fill NA promotions with 0

    # Initialize and fit Prophet model with the regressor
    prophet_model <- prophet(full_data, daily.seasonality = FALSE, weekly.seasonality = TRUE, monthly.seasonality = TRUE);
    prophet_model <- add_regressor(prophet_model, "promo");
    prophet_model <- fit.prophet(prophet_model, full_data);

    # Create future dataframe and add future regressor values (if known)
    future_dates <- make_future_dataframe(prophet_model, periods = 12, freq = "month");

    # Example: assume no promotions in the future forecast period for simplicity
    # In practice, you would populate this based on planned promotions.
    future_promo <- data.frame(ds = future_dates$ds, promo = 0);
    forecast_result <- predict(prophet_model, future_promo);

    # Extract relevant columns
    forecast_df <- data.frame(
        Date = forecast_result$ds,
        ForecastSales = forecast_result$yhat,
        LowerConfidence = forecast_result$yhat_lower,
        UpperConfidence = forecast_result$yhat_upper
    );

    # Filter to show only future predictions
    forecast_df_future <- forecast_df[forecast_df$Date > max(sales_data$ds), ];

    return(forecast_df_future);
    ';

    EXEC sp_execute_external_script
        @language = N'R',
        @script = @RScript,
        @params = N'@TrainingDataSize INT',
        @TrainingDataSize = @TrainingDataSize
    WITH RESULT SETS ((
        ForecastDate DATE,
        ForecastAmount DECIMAL(18, 2),
        LowerConfidence DECIMAL(18, 2),
        UpperConfidence DECIMAL(18, 2)
    ));
END;
GO

-- Example Usage:
-- EXEC dbo.sp_ForecastSalesProphetWithRegressors @TrainingDataSize = 150;
                    

Deploying a Forecasting Model

Saving your trained model and calling it from SQL is crucial for production use. This example shows how to save a Prophet model using R.


-- Prerequisites: R services, Prophet package.
-- You will need a method to store the serialized model, e.g., a file path or a VARBINARY column in a table.
-- For simplicity, this example assumes saving to a file path accessible by SQL Server.

-- Stored Procedure to train, save, and predict
CREATE PROCEDURE dbo.sp_ForecastSalesDeploy
    @ModelSavePath NVARCHAR(255) = 'C:\MLModels\SalesProphetModel.rds', -- Path to save the model
    @TrainingDataSize INT = 200
AS
BEGIN
    SET NOCOUNT ON;

    DECLARE @RScript NVARCHAR(MAX);
    SET @RScript = N'
    library(prophet);
    library(data.table); # Useful for data manipulation

    # Fetch training data
    sql_sales_query <- "SELECT SaleDate, SalesAmount FROM SalesData ORDER BY SaleDate ASC OFFSET 0 ROWS FETCH NEXT @TrainingDataSize ROWS ONLY";
    sales_data <- sqlQuery(connection, sql_sales_query, stringsAsFactors = FALSE);
    colnames(sales_data) <- c("ds", "y");
    sales_data$ds <- as.Date(sales_data$ds);

    # Initialize and fit Prophet model
    prophet_model <- prophet(sales_data, daily.seasonality = FALSE, weekly.seasonality = TRUE, monthly.seasonality = TRUE);

    # Save the trained model
    saveRDS(prophet_model, file = @ModelSavePath);

    # Create future dates dataframe for prediction (next 12 months)
    future_dates <- make_future_dataframe(prophet_model, periods = 12, freq = "month");

    # Make predictions
    forecast_result <- predict(prophet_model, future_dates);

    # Extract relevant columns
    forecast_df <- data.frame(
        Date = forecast_result$ds,
        ForecastSales = forecast_result$yhat,
        LowerConfidence = forecast_result$yhat_lower,
        UpperConfidence = forecast_result$yhat_upper
    );

    # Filter to show only future predictions
    forecast_df_future <- forecast_df[forecast_df$Date > max(sales_data$ds), ];

    return(forecast_df_future);
    ';

    EXEC sp_execute_external_script
        @language = N'R',
        @script = @RScript,
        @params = N'@ModelSavePath NVARCHAR(255), @TrainingDataSize INT',
        @ModelSavePath = @ModelSavePath,
        @TrainingDataSize = @TrainingDataSize
    WITH RESULT SETS ((
        ForecastDate DATE,
        ForecastAmount DECIMAL(18, 2),
        LowerConfidence DECIMAL(18, 2),
        UpperConfidence DECIMAL(18, 2)
    ));

    PRINT ''Model saved successfully to: '' + @ModelSavePath;
END;
GO

-- To predict using the saved model (requires a separate procedure or script)
-- Example of a prediction stored procedure (simplified)
CREATE PROCEDURE dbo.sp_PredictSalesFromModel
    @ModelLoadPath NVARCHAR(255),
    @PredictMonths INT = 12
AS
BEGIN
    SET NOCOUNT ON;

    DECLARE @RScript NVARCHAR(MAX);
    SET @RScript = N'
    library(prophet);

    # Load the saved model
    prophet_model <- readRDS(file = @ModelLoadPath);

    # Create future dates dataframe for prediction
    future_dates <- make_future_dataframe(prophet_model, periods = @PredictMonths, freq = "month");

    # Make predictions
    forecast_result <- predict(prophet_model, future_dates);

    # Extract relevant columns
    forecast_df <- data.frame(
        Date = forecast_result$ds,
        ForecastSales = forecast_result$yhat,
        LowerConfidence = forecast_result$yhat_lower,
        UpperConfidence = forecast_result$yhat_upper
    );

    # Return only the future predictions
    # Find the last date from the model training data (requires a way to access it, or hardcode if always same source)
    # For this example, we assume current system date is a safe lower bound for past data
    forecast_df_future <- forecast_df[forecast_df$Date > Sys.Date(), ]; # Simplified filter

    return(forecast_df_future);
    ';

    EXEC sp_execute_external_script
        @language = N'R',
        @script = @RScript,
        @params = N'@ModelLoadPath NVARCHAR(255), @PredictMonths INT',
        @ModelLoadPath = @ModelLoadPath,
        @PredictMonths = @PredictMonths
    WITH RESULT SETS ((
        ForecastDate DATE,
        ForecastAmount DECIMAL(18, 2),
        LowerConfidence DECIMAL(18, 2),
        UpperConfidence DECIMAL(18, 2)
    ));
END;
GO

-- Example Usage:
-- First, train and save the model:
-- EXEC dbo.sp_ForecastSalesDeploy @ModelSavePath = 'C:\MLModels\SalesProphetModel.rds', @TrainingDataSize = 150;
-- Then, use the saved model for predictions:
-- EXEC dbo.sp_PredictSalesFromModel @ModelLoadPath = 'C:\MLModels\SalesProphetModel.rds', @PredictMonths = 6;