Time Series Forecasting: From ARIMA to Neural Networks
Time Series Forecasting: From ARIMA to Neural Networks
Time series forecasting is crucial in many domains - from stock prices to weather prediction. Let's explore both classical statistical methods and modern deep learning approaches.
Understanding Time Series Components
A time series can be decomposed into:
Where:
- = Trend component
- = Seasonal component
- = Cyclical component
- = Irregular (noise) component
ARIMA Models
ARIMA(p,d,q) combines three components:
- AR(p): Autoregression of order p
- I(d): Integration (differencing) of order d
- MA(q): Moving average of order q
Mathematical Foundation
AR(p) model:
MA(q) model:
ARIMA(p,d,q):
Where is the lag operator.
Implementation
import pandas as pd import numpy as np import matplotlib.pyplot as plt from statsmodels.tsa.arima.model import ARIMA from statsmodels.tsa.stattools import adfuller from statsmodels.graphics.tsaplots import plot_acf, plot_pacf class ARIMAForecaster: def __init__(self, order=(1,1,1)): self.order = order self.model = None self.fitted_model = None def check_stationarity(self, ts): """Check if time series is stationary using ADF test.""" result = adfuller(ts) print(f'ADF Statistic: {result[0]:.6f}') print(f'p-value: {result[1]:.6f}') print(f'Critical Values:') for key, value in result[4].items(): print(f'\t{key}: {value:.3f}') if result[1] <= 0.05: print("Series is stationary") else: print("Series is not stationary") return result[1] <= 0.05 def difference_series(self, ts, d=1): """Apply differencing to make series stationary.""" for i in range(d): ts = ts.diff().dropna() return ts def find_optimal_order(self, ts, max_p=5, max_q=5): """Find optimal ARIMA order using AIC.""" from itertools import product best_aic = float('inf') best_order = None for p, q in product(range(max_p+1), range(max_q+1)): try: model = ARIMA(ts, order=(p, self.order[1], q)) fitted = model.fit() if fitted.aic < best_aic: best_aic = fitted.aic best_order = (p, self.order[1], q) except: continue print(f"Best order: {best_order}, AIC: {best_aic:.2f}") return best_order def fit(self, ts): """Fit ARIMA model to time series.""" self.model = ARIMA(ts, order=self.order) self.fitted_model = self.model.fit() return self def forecast(self, steps=10): """Generate forecasts.""" forecast = self.fitted_model.forecast(steps=steps) conf_int = self.fitted_model.get_forecast(steps=steps).conf_int() return forecast, conf_int def plot_diagnostics(self): """Plot model diagnostics.""" self.fitted_model.plot_diagnostics(figsize=(15, 12)) plt.show() # Example usage # Generate sample time series np.random.seed(42) dates = pd.date_range('2020-01-01', periods=365, freq='D') trend = np.linspace(100, 200, 365) seasonal = 10 * np.sin(2 * np.pi * np.arange(365) / 365.25 * 4) noise = np.random.normal(0, 5, 365) ts = pd.Series(trend + seasonal + noise, index=dates) # Fit ARIMA model forecaster = ARIMAForecaster(order=(2,1,2)) forecaster.fit(ts) # Generate forecasts forecast, conf_int = forecaster.forecast(steps=30)
Modern Approaches: LSTM Networks
Long Short-Term Memory networks can capture complex temporal patterns:
Implementation with TensorFlow
import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import LSTM, Dense, Dropout from sklearn.preprocessing import MinMaxScaler class LSTMForecaster: def __init__(self, lookback=60, lstm_units=50): self.lookback = lookback self.lstm_units = lstm_units self.model = None self.scaler = MinMaxScaler() def prepare_data(self, ts): """Prepare data for LSTM training.""" # Scale data scaled_data = self.scaler.fit_transform(ts.values.reshape(-1, 1)) X, y = [], [] for i in range(self.lookback, len(scaled_data)): X.append(scaled_data[i-self.lookback:i, 0]) y.append(scaled_data[i, 0]) return np.array(X), np.array(y) def build_model(self): """Build LSTM model architecture.""" self.model = Sequential([ LSTM(self.lstm_units, return_sequences=True, input_shape=(self.lookback, 1)), Dropout(0.2), LSTM(self.lstm_units, return_sequences=False), Dropout(0.2), Dense(25), Dense(1) ]) self.model.compile(optimizer='adam', loss='mse') return self.model def fit(self, ts, epochs=50, batch_size=32, validation_split=0.2): """Train the LSTM model.""" X, y = self.prepare_data(ts) X = X.reshape((X.shape[0], X.shape[1], 1)) if self.model is None: self.build_model() history = self.model.fit( X, y, epochs=epochs, batch_size=batch_size, validation_split=validation_split, verbose=1 ) return history def predict(self, ts, steps=30): """Generate forecasts.""" # Use last lookback points for prediction last_sequence = ts[-self.lookback:].values.reshape(-1, 1) last_sequence_scaled = self.scaler.transform(last_sequence) predictions = [] current_sequence = last_sequence_scaled.copy() for _ in range(steps): # Predict next value next_pred = self.model.predict( current_sequence.reshape(1, self.lookback, 1), verbose=0 ) predictions.append(next_pred[0, 0]) # Update sequence for next prediction current_sequence = np.roll(current_sequence, -1) current_sequence[-1] = next_pred # Inverse transform predictions predictions = np.array(predictions).reshape(-1, 1) predictions = self.scaler.inverse_transform(predictions) return predictions.flatten() # Usage lstm_forecaster = LSTMForecaster(lookback=60, lstm_units=50) history = lstm_forecaster.fit(ts, epochs=50) lstm_forecast = lstm_forecaster.predict(ts, steps=30)
Transformer Models for Time Series
Recent advances use attention mechanisms:
import tensorflow as tf from tensorflow.keras.layers import MultiHeadAttention, LayerNormalization class TimeSeriesTransformer: def __init__(self, seq_len=60, d_model=64, num_heads=8): self.seq_len = seq_len self.d_model = d_model self.num_heads = num_heads def build_model(self): """Build Transformer model for time series.""" inputs = tf.keras.Input(shape=(self.seq_len, 1)) # Positional encoding x = tf.keras.layers.Dense(self.d_model)(inputs) # Multi-head attention attention_output = MultiHeadAttention( num_heads=self.num_heads, key_dim=self.d_model )(x, x) # Add & Norm x = LayerNormalization()(x + attention_output) # Feed forward ff_output = tf.keras.layers.Dense(self.d_model * 4, activation='relu')(x) ff_output = tf.keras.layers.Dense(self.d_model)(ff_output) # Add & Norm x = LayerNormalization()(x + ff_output) # Global pooling and output x = tf.keras.layers.GlobalAveragePooling1D()(x) outputs = tf.keras.layers.Dense(1)(x) model = tf.keras.Model(inputs, outputs) model.compile(optimizer='adam', loss='mse') return model
Model Comparison Framework
def compare_forecasting_models(ts, test_size=30): """Compare different forecasting approaches.""" from sklearn.metrics import mean_absolute_error, mean_squared_error # Split data train = ts[:-test_size] test = ts[-test_size:] results = {} # ARIMA arima = ARIMAForecaster(order=(2,1,2)) arima.fit(train) arima_forecast, _ = arima.forecast(steps=test_size) # LSTM lstm = LSTMForecaster(lookback=60) lstm.fit(train, epochs=50, verbose=0) lstm_forecast = lstm.predict(train, steps=test_size) # Calculate metrics models = { 'ARIMA': arima_forecast, 'LSTM': lstm_forecast } for name, forecast in models.items(): mae = mean_absolute_error(test, forecast) rmse = np.sqrt(mean_squared_error(test, forecast)) results[name] = {'MAE': mae, 'RMSE': rmse} return results # Usage comparison_results = compare_forecasting_models(ts, test_size=30) for model, metrics in comparison_results.items(): print(f"{model}: MAE={metrics['MAE']:.2f}, RMSE={metrics['RMSE']:.2f}")
Choosing the Right Approach
| Method | Best For | Limitations | |--------|----------|-------------| | ARIMA | Linear patterns, small data | Non-linear patterns | | LSTM | Non-linear patterns, medium data | Requires lots of data | | Transformer | Complex patterns, large data | Computationally expensive |
Advanced Topics
1. Seasonal ARIMA (SARIMA)
For data with seasonal patterns:
2. Prophet by Facebook
Great for business time series with holidays and seasonality:
from prophet import Prophet df_prophet = pd.DataFrame({'ds': ts.index, 'y': ts.values}) model = Prophet() model.fit(df_prophet) future = model.make_future_dataframe(periods=30) forecast = model.predict(future) model.plot(forecast)
3. Ensemble Methods
Combine multiple forecasting models:
def ensemble_forecast(forecasts, weights=None): """Weighted ensemble of forecasts.""" if weights is None: weights = np.ones(len(forecasts)) / len(forecasts) return np.average(forecasts, axis=0, weights=weights) # Combine ARIMA and LSTM forecasts ensemble_pred = ensemble_forecast([arima_forecast, lstm_forecast])
Conclusion
Time series forecasting requires understanding both the data characteristics and model assumptions. Classical methods like ARIMA work well for linear patterns, while neural networks excel with complex, non-linear relationships.
The key is to start simple, understand your data, and gradually increase model complexity as needed.