pip install pandas
pip install pandas-datareader
First, we need to import the packages and modules.
import pandas as pd
from pandas_datareader import data
import matplotlib.pyplot as plt
from random import randint
import numpy as np
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
Now we define a function which sets the company we want to look at and which has as parameters the starting and the ending dates.
def get_data(start_date, end_date):
symbol = 'NFLX'
data_source = 'yahoo'
start_date = start_date
end_date = end_date
df = data.get_data_yahoo(symbol, start_date, end_date)
return df
Now we define a function which calculates the ordinary least square equation and returns either the OLS equation or only the two parameters a and b.
def OLS(df, k=1):
x_real = np.arange(len(df.index))
y_real = np.array(df['Adj Close'])
x_bar = 1/len(x_real) * sum(x_real)
y_bar = 1/len(y_real) * sum(y_real)
a_1 = 0
a_2 = 0
for i in range(len(x_real)):
a_1 += (x_real[i]-x_bar)* (y_real[i]-y_bar)
a_2 += (x_real[i]-x_bar)**2
a = a_1/a_2
b = y_bar - a*x_bar
print("Ordinary least square equation: y = {0:.2f}x+{1:.2f}".format(a, b))
lx = x_real.tolist()
eq = [a*i+b for i in lx]
if k == 0:
return a, b
elif k==1:
return eq
We now need to set our data with the appropiate linear equation
df = get_data('2020-01-01','2020-10-01')
eq = OLS(df)
Finally, we can plot the data with the appropriate OLS line. Since the stock market is closed on weekends and on public holidays, there are missing days. That is why we first need to define x as an ordinary array and then relate the dates later.
x = np.arange(len(df.index))
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot_date(x , y = df['Adj Close'])
fig.suptitle('NFLX')
ax.set_xlabel('Date')
ax.set_ylabel("Adj Close")
ax.plot(x, eq)
#to label the x axis with the dates. Here it is every 14 trading days.
xt = np.arange(0, len(df.index), step=14)
xl = df.index[xt].date
ax.set_xticks(xt, minor=False)
ax.set_xticklabels(xl, minor=False, rotation=45)
plt.show()
If we now want to predict the stock price of the first December 2020, we can use our OLS equation and caluclate the predicted price. However we first need to convert the date into an integer. Since the stock market is closed on Saturdays and Sundays, as well as on public holidays, we need to find out which integer corresponds to the first December 2020.
df = get_data('2020-01-01','2020-12-01')
df.info()
We see that we have 232 entries from the start of the year until the first December 2020. So by using our OLS equation, we get the following predicted stock price:
df = get_data('2020-01-01','2020-09-30')
a, b = OLS(df, 0)
y = a*232+b
y
However if we look at the actual stock price of the first December 2020, we see that the price is 503.73 (see yahoo finance) so we have an error of +/- 13%. This error occurs because we do not have a proper linear relationship between the dependent and the independent variables. However the OLS equation helps us to get a direction of the stock prices in the near future.
df = get_data('2020-01-01','2020-11-28')
eq = OLS(df, 1)
x = np.arange(len(df.index))
fig, ax = plt.subplots(figsize=(12, 6))
ax.plot_date(x , y = df['Adj Close'])
fig.suptitle('NFLX')
ax.set_xlabel('Date')
ax.set_ylabel("Adj Close")
ax.plot(x, eq)
#to label the x axis with the dates. Here it is every 14 trading days.
xt = np.arange(0, len(df.index), step=14)
xl = df.index[xt].date
ax.set_xticks(xt, minor=False)
ax.set_xticklabels(xl, minor=False, rotation=45)
plt.show()