0% found this document useful (0 votes)
14 views

JPMC - Task 1

JPMC Quant Research

Uploaded by

Pradeep gandhe
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
14 views

JPMC - Task 1

JPMC Quant Research

Uploaded by

Pradeep gandhe
Copyright
© © All Rights Reserved
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 4

Task 1 – Example Answer with Python Code

import os

cwd = os.getcwd()

print("Current working directory: {0}".format(cwd))

print ("os.getcwd() returns an object of type


{0}".format(type(cwd)))

# copy the filepath


os.chdir ("________")

# let's jump into task 1

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import date, timedelta

date_time = ["10-2020", "11-2020", "12-2020"]


date_time = pd.to_datetime(date_time)
data = [1, 2, 3]

df = pd.read_csv('natgas_R.csv', parse_dates=['Dates'])
prices = df['Prices'].values
dates = df['Dates'].values

# plot prices against dates


fig, ax = plt.subplots()
ax.plot_date(dates, prices, '-')
ax.set_xlabel('Date')
ax.set_ylabel('Price')
ax.set_title('Natural Gas Prices')
ax.tick_params(axis='x', rotation=45)

plt.show()

# From the plot - we can see the prices have a natural frequency of around a year, but
trend upwards.
# We can do a linear regression to get the trend, and then fit a sin function to the
variation in each year.

# First we need the dates in terms of days from the start, to make it easier to interpolate
later.
start_date = date(2020,10,31)
end_date = date(2024,9,30)
months = []
year = start_date.year
month = start_date.month + 1
while True:
current = date(year, month, 1) + timedelta(days=-1)
months.append(current)
if current.month == end_date.month and current.year ==
end_date.year:
break
else:
month = ((month + 1) % 12) or 12
if month == 1:
year += 1

days_from_start = [(day - start_date ).days for day in months]

# Simple regression for the trend will fit to a model y = Ax + B. The estimator for the
slope is given by \hat{A} = \frac{\sum (x_i - \bar{x})(y_i - \bar{y})}{\sum (x_i - \
bar{x})^2},
# and that for the intercept by \hat{B} = \bar{y} - hat{A} * \xbar

def simple_regression(x, y):


xbar = np.mean(x)
ybar = np.mean(y)
slope = np.sum((x - xbar) * (y - ybar))/ np.sum((x -
xbar)**2)
intercept = ybar - slope*xbar
return slope, intercept

time = np.array(days_from_start)
slope, intercept = simple_regression(time, prices)

# Plot linear trend


plt.plot(time, prices)
plt.plot(time, time * slope + intercept)
plt.xlabel('Days from start date')
plt.ylabel('Price')
plt.title('Linear Trend of Monthly Input Prices')
plt.show()
print(slope, intercept)

# From this plot we see the linear trend has been captured. Now to fit the intra-year
variation.
# Given that natural gas is used more in winter, and less in summer, we can guess the
frequency of the price movements to be about a year, or 12 months.
# Therefore we have a model y = Asin( kt + z ) with a known frequency.Rewriting y =
Acos(z)sin(kt) + Asin(z)cos(kt),
# we can use bilinear regression, with no intercept, to solve for u = Acos(z), w = Asin(z)

sin_prices = prices - (time * slope + intercept)


sin_time = np.sin(time * 2 * np.pi / (365))
cos_time = np.cos(time * 2 * np.pi / (365))

def bilinear_regression(y, x1, x2):


# Bilinear regression without an intercept amounts to projection onto the x-vectors
slope1 = np.sum(y * x1) / np.sum(x1 ** 2)
slope2 = np.sum(y * x2) / np.sum(x2 ** 2)
return(slope1, slope2)

slope1, slope2 = bilinear_regression(sin_prices, sin_time,


cos_time)

# We now recover the original amplitude and phase shift as A = slope1 ** 2 + slope2 **
2, z = tan^{-1}(slope2/slope1)
amplitude = np.sqrt(slope1 ** 2 + slope2 ** 2)
shift = np.arctan2(slope2, slope1)

# Plot smoothed estimate of full dataset


plt.plot(time, amplitude * np.sin(time * 2 * np.pi / 365 +
shift))
plt.plot(time, sin_prices)
plt.title('Smoothed Estimate of Monthly Input Prices')

# Define the interpolation/extrapolation function


def interpolate(date):
days = (date - pd.Timestamp(start_date)).days
if days in days_from_start:
# Exact match found in the data
return prices[days_from_start.index(days)]
else:
# Interpolate/extrapolate using the sin/cos model
return amplitude * np.sin(days * 2 * np.pi / 365 +
shift) + days * slope + intercept

# Create a range of continuous dates from start date to end date


continuous_dates =
pd.date_range(start=pd.Timestamp(start_date),
end=pd.Timestamp(end_date), freq='D')

# Plot the smoothed estimate of the full dataset using interpolation


plt.plot(continuous_dates, [interpolate(date) for date in
continuous_dates], label='Smoothed Estimate')

# Fit the monthly input prices to the sine curve


x = np.array(days_from_start)
y = np.array(prices)
fit_amplitude = np.sqrt(slope1 ** 2 + slope2 ** 2)
fit_shift = np.arctan2(slope2, slope1)
fit_slope, fit_intercept = simple_regression(x, y -
fit_amplitude * np.sin(x * 2 * np.pi / 365 + fit_shift))
plt.plot(dates, y, 'o', label='Monthly Input Prices')
plt.plot(continuous_dates, fit_amplitude *
np.sin((continuous_dates - pd.Timestamp(start_date)).days * 2
* np.pi / 365 + fit_shift) + (continuous_dates -
pd.Timestamp(start_date)).days * fit_slope + fit_intercept,
label='Fit to Sine Curve')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Natural Gas Prices')
plt.legend()
plt.show()

You might also like