0% found this document useful (0 votes)
17 views

JPMC - Task 1

JPMC Quant Research

Uploaded by

Pradeep gandhe
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
17 views

JPMC - Task 1

JPMC Quant Research

Uploaded by

Pradeep gandhe
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 4

Task 1 – Example Answer with Python Code

import os

cwd = os.getcwd()

print("Current working directory: {0}".format(cwd))

print ("os.getcwd() returns an object of type


{0}".format(type(cwd)))

# copy the filepath


os.chdir ("________")

# let's jump into task 1

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import date, timedelta

date_time = ["10-2020", "11-2020", "12-2020"]


date_time = pd.to_datetime(date_time)
data = [1, 2, 3]

df = pd.read_csv('natgas_R.csv', parse_dates=['Dates'])
prices = df['Prices'].values
dates = df['Dates'].values

# plot prices against dates


fig, ax = plt.subplots()
ax.plot_date(dates, prices, '-')
ax.set_xlabel('Date')
ax.set_ylabel('Price')
ax.set_title('Natural Gas Prices')
ax.tick_params(axis='x', rotation=45)

plt.show()

# From the plot - we can see the prices have a natural frequency of around a year, but
trend upwards.
# We can do a linear regression to get the trend, and then fit a sin function to the
variation in each year.

# First we need the dates in terms of days from the start, to make it easier to interpolate
later.
start_date = date(2020,10,31)
end_date = date(2024,9,30)
months = []
year = start_date.year
month = start_date.month + 1
while True:
current = date(year, month, 1) + timedelta(days=-1)
months.append(current)
if current.month == end_date.month and current.year ==
end_date.year:
break
else:
month = ((month + 1) % 12) or 12
if month == 1:
year += 1

days_from_start = [(day - start_date ).days for day in months]

# Simple regression for the trend will fit to a model y = Ax + B. The estimator for the
slope is given by \hat{A} = \frac{\sum (x_i - \bar{x})(y_i - \bar{y})}{\sum (x_i - \
bar{x})^2},
# and that for the intercept by \hat{B} = \bar{y} - hat{A} * \xbar

def simple_regression(x, y):


xbar = np.mean(x)
ybar = np.mean(y)
slope = np.sum((x - xbar) * (y - ybar))/ np.sum((x -
xbar)**2)
intercept = ybar - slope*xbar
return slope, intercept

time = np.array(days_from_start)
slope, intercept = simple_regression(time, prices)

# Plot linear trend


plt.plot(time, prices)
plt.plot(time, time * slope + intercept)
plt.xlabel('Days from start date')
plt.ylabel('Price')
plt.title('Linear Trend of Monthly Input Prices')
plt.show()
print(slope, intercept)

# From this plot we see the linear trend has been captured. Now to fit the intra-year
variation.
# Given that natural gas is used more in winter, and less in summer, we can guess the
frequency of the price movements to be about a year, or 12 months.
# Therefore we have a model y = Asin( kt + z ) with a known frequency.Rewriting y =
Acos(z)sin(kt) + Asin(z)cos(kt),
# we can use bilinear regression, with no intercept, to solve for u = Acos(z), w = Asin(z)

sin_prices = prices - (time * slope + intercept)


sin_time = np.sin(time * 2 * np.pi / (365))
cos_time = np.cos(time * 2 * np.pi / (365))

def bilinear_regression(y, x1, x2):


# Bilinear regression without an intercept amounts to projection onto the x-vectors
slope1 = np.sum(y * x1) / np.sum(x1 ** 2)
slope2 = np.sum(y * x2) / np.sum(x2 ** 2)
return(slope1, slope2)

slope1, slope2 = bilinear_regression(sin_prices, sin_time,


cos_time)

# We now recover the original amplitude and phase shift as A = slope1 ** 2 + slope2 **
2, z = tan^{-1}(slope2/slope1)
amplitude = np.sqrt(slope1 ** 2 + slope2 ** 2)
shift = np.arctan2(slope2, slope1)

# Plot smoothed estimate of full dataset


plt.plot(time, amplitude * np.sin(time * 2 * np.pi / 365 +
shift))
plt.plot(time, sin_prices)
plt.title('Smoothed Estimate of Monthly Input Prices')

# Define the interpolation/extrapolation function


def interpolate(date):
days = (date - pd.Timestamp(start_date)).days
if days in days_from_start:
# Exact match found in the data
return prices[days_from_start.index(days)]
else:
# Interpolate/extrapolate using the sin/cos model
return amplitude * np.sin(days * 2 * np.pi / 365 +
shift) + days * slope + intercept

# Create a range of continuous dates from start date to end date


continuous_dates =
pd.date_range(start=pd.Timestamp(start_date),
end=pd.Timestamp(end_date), freq='D')

# Plot the smoothed estimate of the full dataset using interpolation


plt.plot(continuous_dates, [interpolate(date) for date in
continuous_dates], label='Smoothed Estimate')

# Fit the monthly input prices to the sine curve


x = np.array(days_from_start)
y = np.array(prices)
fit_amplitude = np.sqrt(slope1 ** 2 + slope2 ** 2)
fit_shift = np.arctan2(slope2, slope1)
fit_slope, fit_intercept = simple_regression(x, y -
fit_amplitude * np.sin(x * 2 * np.pi / 365 + fit_shift))
plt.plot(dates, y, 'o', label='Monthly Input Prices')
plt.plot(continuous_dates, fit_amplitude *
np.sin((continuous_dates - pd.Timestamp(start_date)).days * 2
* np.pi / 365 + fit_shift) + (continuous_dates -
pd.Timestamp(start_date)).days * fit_slope + fit_intercept,
label='Fit to Sine Curve')

plt.xlabel('Date')
plt.ylabel('Price')
plt.title('Natural Gas Prices')
plt.legend()
plt.show()

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy