File size: 5,360 Bytes
ba898b9
 
 
 
0dc2bb2
 
ba898b9
 
0dc2bb2
 
 
 
 
 
ba898b9
0dc2bb2
ba898b9
 
 
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
0dc2bb2
ba898b9
 
0dc2bb2
ba898b9
 
0dc2bb2
 
ba898b9
0dc2bb2
 
ba898b9
0dc2bb2
ba898b9
0dc2bb2
ba898b9
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
 
0dc2bb2
 
ba898b9
0dc2bb2
 
 
ba898b9
0dc2bb2
 
 
 
ba898b9
0dc2bb2
 
 
ba898b9
 
 
 
 
0dc2bb2
ba898b9
0dc2bb2
 
ba898b9
0dc2bb2
ba898b9
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
 
0dc2bb2
ba898b9
 
 
0dc2bb2
ba898b9
0dc2bb2
ba898b9
 
 
 
 
0dc2bb2
ba898b9
 
 
 
 
0dc2bb2
 
ba898b9
0dc2bb2
ba898b9
 
0dc2bb2
 
ba898b9
 
 
 
 
 
 
0dc2bb2
 
ba898b9
0dc2bb2
 
 
ba898b9
 
0dc2bb2
 
 
 
 
ba898b9
 
0dc2bb2
 
 
ba898b9
 
0dc2bb2
ba898b9
 
 
0dc2bb2
ba898b9
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import torch
from chronos import ChronosPipeline

class TimeSeriesForecaster:
    def __init__(self, model_name="amazon/chronos-t5-small"):
        self.pipeline = ChronosPipeline.from_pretrained(
            model_name,
            device_map="cuda" if torch.cuda.is_available() else "cpu",
            torch_dtype=torch.bfloat16 if torch.cuda.is_available() else torch.float32
        )
        self.original_series = None
        self.context = None

    def preprocess_data(self, df, date_column, value_column, context_length=30, prediction_length=7):
        """
        Prepare time series data from DataFrame
        """
        # Ensure data is sorted by date
        df = df.sort_values(by=date_column)
        
        # Convert date column to datetime
        df[date_column] = pd.to_datetime(df[date_column])
        
        # Set index to date
        df.set_index(date_column, inplace=True)
        
        # Extract numeric series
        self.original_series = df[value_column].values
        
        # Convert to tensor
        self.context = torch.tensor(self.original_series[-context_length:], dtype=torch.float32)
        
        return self.context, context_length

    def forecast(self, context, prediction_length=7, num_samples=100):
        """
        Perform time series forecasting
        """
        forecasts = self.pipeline.predict(context, prediction_length, num_samples=num_samples)
        return forecasts

    def visualize_forecast(self, context, forecasts):
        """
        Create visualization of predictions
        """
        plt.figure(figsize=(12, 6))
        
        # Plot original series
        plt.plot(range(len(self.original_series)), self.original_series, label='Historical Data', color='blue')
        
        # Calculate forecast statistics
        forecast_np = forecasts[0].numpy()
        low, median, high = np.quantile(forecast_np, [0.1, 0.5, 0.9], axis=0)
        
        # Plot forecast
        forecast_index = range(len(self.original_series), len(self.original_series) + len(median))
        plt.plot(forecast_index, median, color='red', label='Median Forecast')
        plt.fill_between(forecast_index, low, high, color='red', alpha=0.3, label='80% Prediction Interval')
        
        plt.title('Time Series Forecasting with Amazon Chronos')
        plt.xlabel('Time Index')
        plt.ylabel('Value')
        plt.legend()
        
        return plt

def main():
    st.title('🕰️ Time Series Forecasting with Amazon Chronos')
    
    # Sidebar for upload and configuration
    st.sidebar.header('Forecast Settings')
    
    # Upload CSV file
    uploaded_file = st.sidebar.file_uploader(
        "Upload CSV File", 
        type=['csv'], 
        help="Ensure CSV file has date and numeric columns"
    )
    
    # Column selection and prediction settings
    if uploaded_file is not None:
        # Read CSV
        df = pd.read_csv(uploaded_file)
        
        # Select columns
        date_column = st.sidebar.selectbox(
            'Select Date Column', 
            options=df.columns
        )
        value_column = st.sidebar.selectbox(
            'Select Value Column', 
            options=[col for col in df.columns if col != date_column]
        )
        
        # Prediction parameters
        context_length = st.sidebar.slider(
            'Context Length', 
            min_value=10, 
            max_value=100, 
            value=30
        )
        prediction_length = st.sidebar.slider(
            'Prediction Length', 
            min_value=1, 
            max_value=30, 
            value=7
        )
        
        # Process button
        if st.sidebar.button('Perform Forecast'):
            try:
                # Initialize forecaster
                forecaster = TimeSeriesForecaster()
                
                # Preprocess data
                context, _ = forecaster.preprocess_data(
                    df, 
                    date_column, 
                    value_column, 
                    context_length, 
                    prediction_length
                )
                
                # Perform forecasting
                forecasts = forecaster.forecast(context, prediction_length)
                
                # Visualize results
                st.subheader('Forecast Visualization')
                plt = forecaster.visualize_forecast(context, forecasts)
                st.pyplot(plt)
                
                # Display forecast details
                forecast_np = forecasts[0].numpy()
                forecast_mean = forecast_np.mean(axis=0)
                forecast_lower = np.percentile(forecast_np, 10, axis=0)
                forecast_upper = np.percentile(forecast_np, 90, axis=0)
                
                prediction_df = pd.DataFrame({
                    'Mean Forecast': forecast_mean,
                    'Lower Bound (10%)': forecast_lower,
                    'Upper Bound (90%)': forecast_upper
                })
                
                st.subheader('Forecast Details')
                st.dataframe(prediction_df)
                
            except Exception as e:
                st.error(f"An error occurred: {str(e)}")

if __name__ == '__main__':
    main()