import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.dpi'] = 120
plt.rcParams['figure.figsize'] = [10,5]
plt.rcParams['axes.grid'] = True
import quandl as qdl
from pandas.plotting import register_matplotlib_converters
#plt.rcParams.keys()
register_matplotlib_converters()
#warnings.warn(msg, FutureWarning)
data = qdl.get("WSE/WIG30")
plt.plot(data['High'])
plt.show()
Wig_on_high = data['High']
Low, High = np.min(Wig_on_high.array), np.max(Wig_on_high.array)
print(Low, High)
Wig_on_high.index
from scipy.stats import gaussian_kde
smoothed = gaussian_kde(Wig_on_high.array) # create an object containing the data
val = np.linspace(Low-100, High+100, 1000)
plt.hist(Wig_on_high, bins=np.linspace(Low, High, 40), density='True', label='generated dist.', facecolor='green', alpha=0.25, edgecolor='black', linewidth=1.2)
smoothed_val = smoothed(val)
plt.plot(val, smoothed_val)
plt.show()
Value below which there is $\alpha$ data out of the whole data.
$$VaR_\alpha: \int_0^{VaR_\alpha} \rho(x)dx = \alpha$$where $\rho(x)$ is a distribution. For normal distribution
$$VaR_\alpha={\displaystyle \mu +\sigma {\sqrt {2}}\operatorname {erf} ^{-1}(2\alpha-1)}$$normalization = np.sum(smoothed_val)
y = smoothed_val/(normalization)
cum_y = np.cumsum(y)
plt.plot(val, cum_y)
plt.plot(val, y/np.max(y))
level = 10/100
risk = len(val[cum_y<level]) # index of level risk
VaR = val[risk] # value at risk
print(f"VaR({level*100:.1f}%) = {VaR:.2f}")
plt.plot(val, cum_y)
plt.plot(val, y/np.max(y))
plt.fill_between(val[0:risk], 0, y[0:risk]/np.max(y), color='red', alpha = 0.20)
plt.plot(val[0:risk], y[0:risk]/np.max(y), 'r')
Calculate VaR direcly from the bins from the histogram. Write a function which iteratively adjusts number and bin positions for given $\alpha$
Calculate the daily VaR rom WIG data (use histogram for daily relative differences $\displaystyle \frac{y_{t}-y_{t-1}}{y_{t-1}}$.
quad
to integrate and fsolve
to solve appropriate equation). Fast Fourier Transform (FFT):
$${\begin{matrix}X_{k}&=&\sum \limits _{m=0}^{N/2-1}x_{2m}e^{-{\frac {2\pi i}{N}}(2m)k}+\sum \limits _{m=0}^{N/2-1}x_{2m+1}e^{-{\frac {2\pi i}{N}}(2m+1)k}\end{matrix}} = F_k(X_0, X_2,\ldots)+e^{-2\pi ik/N}F_k(X_1, X_3,\ldots)$$But also
$${\begin{matrix}X_{k+N/2}&=&\sum \limits _{m=0}^{N/2-1}x_{2m}e^{-{\frac {2\pi i}{N}}(2m)k}+\sum \limits _{m=0}^{N/2-1}x_{2m+1}e^{-{\frac {2\pi i}{N}}(2m+1)k}\end{matrix}} = F_k(X_0, X_2,\ldots)-e^{-2\pi ik/N}F_k(X_1, X_3,\ldots)$$Instead of 2 sums over $N$ variables we need to calculate two sums over $N/2$ vars.
It can be done recursively reducing complexity $\mathcal{O}(N^2)\to\mathcal{O}(N\log_2N)$
There are algorithms which are generalized but works best for $2^n3^m5^k$
def power_spectrum(signal, T):
N = len(signal)
fft = np.fft.rfft(signal)
power_spect = np.abs(fft)*2/N
power_spect[0] /= 2
fft_freqs = np.linspace(0, np.pi/(t[1]-t[0]), len(power_spect), endpoint=False)
return fft_freqs, power_spect
N = 1001
T = 2*np.pi
t = np.linspace(0, T, N, endpoint=True)
y=np.empty([N, 4])
y[:,0]= 1+np.sin(10*t)+0.5*np.cos(20*t)
y[:,1] = np.cos(30.5*t)
y[:,2] = np.cos(40*t)*np.exp(-0.5*t)
y[:, 3] = np.cos(1005*t)
y_fft = np.empty([N//2+1, 4])
for n in range(4):
freqs, y_fft[:,n] = power_spectrum(y[:,n], T)
for n in range(4):
freqs, y_fft[:,n] = power_spectrum(y[:,n], T)
plt.fill_between(freqs, y_fft[:,n], step="mid", alpha=0.2, color='C'+str(n))
plt.step(freqs, y_fft[:,n], where='mid', color='C'+str(n))
plt.xlim(0, 50)
#plt.step(fft_freqs, power_spectrum, where='mid', color='black')
plt.xlabel('Frequency')
plt.ylabel('Power')
plt.legend(['matching', 'inacurate', 'damped', 'aliased'])
plt.show()
For $x_n=\exp(2\pi ik'n/N)$ we obtain:
$$X_{k}=\sum _{n=0}^{N-1}\exp\left[\frac{2\pi i(k'-k)n}{N}\right]=\sum _{n=0}^{N-1}q^n=\frac{1-q^N}{1-q}$$If $k'\in \mathbb{N}$ then $q^N=1$ and $X_k=\delta_{kk'}$
if $k'\notin \mathbb{N}$ then $\displaystyle |X_k|\sim \frac{1}{k-k'}$
if $k'>N$ than $x_n$ is aliased $k'\to k'\mod{N}$
For better visual properties sometimes the data are multiplied by a window
$$x_n\to w_nx_n$$The most popular windows are:
import scipy.signal as sg
N = 1000
n = np.arange(N)
for k, window in enumerate([sg.blackman, sg.triang, sg.parzen, sg.hamming]):
plt.subplot(2, 2, k+1)
plt.plot(n, window(N), label=window.__name__, color='C'+str(k))
plt.legend()
Y = np.cos(100.5*t)+np.sin(200.1632546*t)+np.sin(300*t)
freqs, Y_fft = power_spectrum(Y, T)
window = sg.blackman(Y.size)
Yw = Y*window
freqs, Yw_fft = power_spectrum(Yw, T)
plt.semilogy(freqs, Y_fft)
plt.semilogy(freqs, Yw_fft)
plt.legend(['pure FFT', 'FFT through a window'])
from scipy.stats import norm
def brownian_motion(σ=2, T=10, N=1000):
t = np.linspace(0, T, N)
dt = t[1]-t[0]
r = norm.rvs(size=t.shape, scale=σ**2*dt)
x = np.cumsum(r) # used cumulative sum instead of loop
return x
M = 10000
brown = brownian_motion(N=M)
s = np.arange(M)
plt.plot(s, brown)
freqs, brown_fft = power_spectrum(brown, T)
plt.loglog(freqs[2:], brown_fft[2:], 'brown')
a=plt.loglog(freqs[2:], 2e-1/freqs[2:], 'green')
a = plt.plot(Wig_on_high, 'b', lw = 0.8)
fft = np.fft.rfft(Wig_on_high)
fft[20:] = 0 # we leave only first 20 frequencies - low frequency filter
ifft = np.fft.irfft(fft)
a = plt.plot(Wig_on_high.index, ifft, 'r')
t = np.linspace(0, 2*np.pi, 10000)
sig = np.sin(100*t*(1+t*t)) + np.cos(2000*t*(1+0.1*np.sin(t)))
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(sig, Fs=10000, noverlap=128)
# Better frequency resolution
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(sig, Fs=10000, noverlap=128, NFFT=1024)
# Better time resolution
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(sig, Fs=10000, noverlap=16, NFFT=32)
plt.subplot(2,1,1)
plt.plot(Wig_on_high, 'b')
plt.legend(['non stationary'])
plt.subplot(2,1,2)
plt.plot(Wig_on_high.index[1:], np.diff(Wig_on_high.array), 'r')
plt.legend(['~ stationary'])
Smoothing - better to see the trends
Simple moving average (window size = $M$)
$$Y_i = \frac{1}{M}\sum_{j=i-M}^i X_j$$
More general weighted averages
$$Y_i = \frac{1}{M}\sum_{j=i-M}^i w_jX_j$$
with different types of windows (gaussian, triangular, Parzen, triangular etc)
Exponential smoothing
$$Y_i = \alpha X_i+(1-\alpha)Y_{i-1}$$
from sklearn.metrics import mean_absolute_error
def plot_moving_average(series, window, plot_intervals=False, scale=1.96):
rolling_mean = series.rolling(window=window).mean()
plt.figure(figsize=(12,6))
plt.title('Moving average\n window size = {}'.format(window))
plt.plot(rolling_mean, 'g', label='Rolling mean trend')
#Plot confidence intervals for smoothed values
if plot_intervals:
mae = mean_absolute_error(series[window:], rolling_mean[window:])
deviation = np.std(series[window:] - rolling_mean[window:])
lower_bound = rolling_mean - (mae + scale * deviation)
upper_bound = rolling_mean + (mae + scale * deviation)
plt.plot(upper_bound, 'r--', label='Upper bound / Lower bound')
plt.plot(lower_bound, 'r--')
plt.plot(series[window:], label='Actual values')
plt.legend(loc='best')
plot_moving_average(data.High, 30)
plot_moving_average(data.High, 90, plot_intervals=True)
### Tripple crossover method
def tripple_crossover_plot(series):
r20 = series.rolling(window=20, center=False).mean()
r50 = series.rolling(window=50, center=False).mean()
r200 = series.rolling(window=200, center=False).mean()
plt.plot(series, 'black', lw=0.8)
plt.plot(r20); plt.plot(r50); plt.plot(r200)
plt.legend(['real data', '20 day SMA', '50 day SMA', '200 day SMA'])
plt.show()
tripple_crossover_plot(Wig_on_high)
import pandas as pd
plt.plot(Wig_on_high, 'b', linewidth=0.8, alpha=0.5)
y1 = pd.DataFrame.rolling(Wig_on_high, window=30, center=True).mean()
y2 = pd.DataFrame.rolling(Wig_on_high, window=60, center=True, win_type='triang').mean()
plt.plot(y1, 'r')
plt.plot(y2, 'g')
exp1 = Wig_on_high.ewm(span=20, adjust=False).mean()
exp2 = Wig_on_high.ewm(span=50, adjust=False).mean()
plt.plot(Wig_on_high, 'b', linewidth=0.5, alpha=0.5)
plt.plot(exp1)
plt.plot(exp2)
a= plt.legend(['real', '20-day', '50-day'])
Continuous version: $${\displaystyle R_{ff}(\tau )=\int _{-\infty }^{\infty }f(t+\tau ){\overline {f(t)}}\,{\rm {d}}t=\int _{-\infty }^{\infty }f(t){\overline {f(t-\tau )}}\,{\rm {d}}t}$$
from statsmodels.graphics.tsaplots import plot_acf
def auto_example1():
Y = np.cos(4*t)
fig, ax = plt.subplots(figsize=(10, 2.5))
plt.figure(figsize=(10,2.5))
a = plt.plot(t,Y)
plot_acf(Y, lags=Y.size-1, ax=ax)
auto_example1()
# nonstationary process
a = plot_acf(Wig_on_high, lags=Wig_on_high.size-1)
# stationary process
y = np.diff(Wig_on_high)
a = plot_acf(y, lags=y.size-1)
Let us consider a model: $y_{t}=\rho y_{t-1}+u_{t}\,$ where $\rho$ is a certain constant and $u_t$ is a random variable.
The model generates a non-stationary series if $\rho=1$ (there exists a s unit root)
Test for unit root:
Augmented Dickey–Fuller test $$\Delta y_t = \alpha + \beta t + \gamma y_{t-1} + \delta_1 \Delta y_{t-1} + \cdots + \delta_{p-1} \Delta y_{t-p+1} + u_t, $$
import statsmodels
from statsmodels.tsa.stattools import adfuller
help(adfuller)
res = adfuller(Wig_on_high)
print(res)
if res[1]>0.05:
print("The series is not stationary") # we cannot reject that there is a unit root
else:
print("The series is stationary")
res = adfuller(np.diff(Wig_on_high))
print(res)
if res[1]>0.05:
print("The series is not stationary") # we cannot reject that there is a unit root
else:
print("The series is stationary")
Calculate VaR direcly from the bins from the histogram. Write a function which iteratively adjusts number and bin positions for given $\alpha$
Calculate the daily VaR rom WIG data (use histogram for daily relative differences $\displaystyle \frac{y_{t}-y_{t-1}}{y_{t-1}}$.
Write a function calculating VaR for distribution given by analytic function (use quad
to integrate and fsolve
to solve appropriate equation).
Check if WIG is a brownian motion.