JPMC - Task 1
JPMC - Task 1
import os
cwd = os.getcwd()
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import date, timedelta
df = pd.read_csv('natgas_R.csv', parse_dates=['Dates'])
prices = df['Prices'].values
dates = df['Dates'].values
plt.show()
# From the plot - we can see the prices have a natural frequency of around a year, but
trend upwards.
# We can do a linear regression to get the trend, and then fit a sin function to the
variation in each year.
# First we need the dates in terms of days from the start, to make it easier to interpolate
later.
start_date = date(2020,10,31)
end_date = date(2024,9,30)
months = []
year = start_date.year
month = start_date.month + 1
while True:
current = date(year, month, 1) + timedelta(days=-1)
months.append(current)
if current.month == end_date.month and current.year ==
end_date.year:
break
else:
month = ((month + 1) % 12) or 12
if month == 1:
year += 1
# Simple regression for the trend will fit to a model y = Ax + B. The estimator for the
slope is given by \hat{A} = \frac{\sum (x_i - \bar{x})(y_i - \bar{y})}{\sum (x_i - \
bar{x})^2},
# and that for the intercept by \hat{B} = \bar{y} - hat{A} * \xbar
time = np.array(days_from_start)
slope, intercept = simple_regression(time, prices)
# From this plot we see the linear trend has been captured. Now to fit the intra-year
variation.
# Given that natural gas is used more in winter, and less in summer, we can guess the
frequency of the price movements to be about a year, or 12 months.
# Therefore we have a model y = Asin( kt + z ) with a known frequency.Rewriting y =
Acos(z)sin(kt) + Asin(z)cos(kt),
# we can use bilinear regression, with no intercept, to solve for u = Acos(z), w = Asin(z)
# We now recover the original amplitude and phase shift as A = slope1 ** 2 + slope2 **
2, z = tan^{-1}(slope2/slope1)
amplitude = np.sqrt(slope1 ** 2 + slope2 ** 2)
shift = np.arctan2(slope2, slope1)
plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Natural Gas Prices')
plt.legend()
plt.show()