import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.dpi'] = 100
plt.rcParams['figure.figsize'] = [10,5]
plt.rcParams['axes.grid'] = True
#import quandl as qdl
from pandas.plotting import register_matplotlib_converters
#plt.rcParams.keys()
import datetime
import pandas_datareader.data as web
import pandas as pd
start, end = datetime.datetime(1971, 12, 30), datetime.datetime(2019, 12, 5)
#tickers = ["^DJI", "^IXIC", "^GSPC", "^STOXX50E", "^N225", "^GDAXI"]
tickers = ["^IXIC", "^GSPC","^N225"]
asset_universe = pd.DataFrame([web.DataReader(ticker, 'yahoo', start,
end).loc[:, 'Adj Close'] for ticker in tickers],
index=tickers).T.fillna(method='ffill')
asset_universe = asset_universe/asset_universe.iloc[0, :]
a = plt.plot(asset_universe)
plt.legend(tickers)
Returns: $$r_i=\frac{p_i-p_j}{p_j}\,,\qquad j=i-1$$ Usually the prices are log-normally distributed Log returns: $$1+r_i=\frac{p_i}{p_j}=\exp\left[\log\left(\frac{p_i}{p_j}\right)\right]$$ Accumulation: $$(1+r_1)(1+r_2)(1+r_3)\ldots=\prod_{i=1}(1+r_i)$$ But better: $$\log(1+r_1)+\log(1+r_2)+\log(1+r_3)\ldots=\sum_{i=1}\log(1+r_i)$$
def show_assets(n):
data = asset_universe[tickers[n]]
time = data.index[1:]
df = np.diff(data)
log_returns = np.log(1+np.diff(data)/data[0:-1])
var = np.var(df)
mean = np.mean(df)
print(f'analysis for {tickers[n]}: {mean:.6f} ± {np.sqrt(var):.6f}')
plt.figure(figsize=(10, 6), dpi=100)
plt.subplot(2, 2, 1)
plt.plot(time, df, 'b')
plt.subplot(2, 2, 2)
plt.hist(df, bins=np.linspace(-0.25, 0.25, 50), density='True', color='C1')
t = np.linspace(-0.25, 0.25, 500)
y = 1/np.sqrt(2*np.pi*var)*np.exp(-(t-mean)**2/(2*var))
plt.plot(t, y, 'black')
plt.subplot(2, 2, 3)
plt.plot(time, log_returns, 'g')
plt.subplot(2, 2, 4)
plt.hist(log_returns, bins=np.linspace(-0.05, 0.05, 50), density='True', color='C3')
var = np.var(log_returns)
mean = np.mean(log_returns)
t = np.linspace(-0.05, 0.05, 500)
y = 1/np.sqrt(2*np.pi*var)*np.exp(-(t-mean)**2/(2*var))
plt.plot(t, y, 'black')
plt.show()
print(log_returns.describe())
show_assets(0)
def power_spectrum(signal, dt):
N = len(signal)
fft = np.fft.rfft(signal)
power_spect = np.abs(fft)*2/N
power_spect[0] /= 2
fft_freqs = np.linspace(0, np.pi/(dt), len(power_spect), endpoint=False)
return fft_freqs, power_spect
def Fouriers(n):
plt.subplot(2, 1, 1)
data = asset_universe[tickers[n]]
freqs, Y_fft = power_spectrum(data, 1)
plt.semilogy(freqs, Y_fft)
plt.loglog(freqs, Y_fft)
plt.subplot(2, 1, 2)
log_returns = np.log(1+np.diff(data)/data[0:-1])
freqs, Y_fft = power_spectrum(log_returns, 1)
plt.semilogy(freqs, Y_fft, color='C2')
Fouriers(0)
def Spectrograms(n):
plt.subplot(2, 2, 1)
data = asset_universe[tickers[n]]
plt.plot(data)
plt.subplot(2, 2, 3)
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(data, Fs=2*np.pi, noverlap=196)
log_returns = np.log(1+np.diff(data)/data[0:-1])
plt.subplot(2, 2, 2)
plt.plot(log_returns, color='r')
plt.subplot(2, 2, 4)
powerSpectrum, freqenciesFound, time, imageAxis = plt.specgram(log_returns, Fs=2*np.pi, noverlap=196)
Spectrograms(0)
import pywt
print(pywt.families(short=False))
wavlist = pywt.wavelist()
print(*wavlist)
def show_wavelets():
plt.figure(figsize=[12, 4], dpi=90)
wavlist = pywt.wavelist(kind='continuous')
for n, wavlet_type in enumerate(wavlist[-7:-1]):
plt.subplot(2,3, n+1)
wavelet = pywt.ContinuousWavelet(wavlet_type)
psi, x = wavelet.wavefun(level=10)
plt.plot(x,psi, color='C'+str(n))
plt.legend([wavlet_type])
Mexican hat 'mexh'(Rickon wavelet):
$\displaystyle \psi(t) = \frac{2}{\sqrt{3} \sqrt[4]{\pi}} \exp^{-\frac{t^2}{2}}\left( 1 - t^2 \right)$
Morlet wavelet 'morl': $\displaystyle \psi(t) = \exp^{-\frac{t^2}{2}} \cos(5t)$ - Fourier transform with gaussian window
print(*pywt.wavelist(kind='continuous'))
show_wavelets()
print(*pywt.wavelist(kind='discrete'))
def show_wavelets_discrete():
plt.figure(figsize=[12, 4], dpi=90)
wavlist = pywt.wavelist(kind='discrete')
for n, wavlet_type in enumerate(['db1' , 'db2', 'db6', 'coif1', 'haar', 'sym4']):
plt.subplot(2,3, n+1)
wavelet = pywt.Wavelet(wavlet_type)
phi, psi, x = wavelet.wavefun(level=9)
plt.plot(x,psi, color='C'+str(2*n))
plt.plot(x,phi, color='C'+str(2*n+1))
plt.legend([wavlet_type + ' $\phi$', wavlet_type + ' $\psi$'])
show_wavelets_discrete()
def show_signal():
x = np.linspace(0, 1, 1024)
y = np.empty_like(x)
y = np.sin(16*x*(1+4*x))
#plt.subplot(211)
plt.xlim(0, 1)
plt.plot(x,y)
#plt.subplot(212)
plt.ylim(-2.5, 1)
wavelet = pywt.ContinuousWavelet('mexh')
psi, x = wavelet.wavefun(level=10)
plt.plot((x)/16+0.075, psi-1.8, 'C1')
plt.plot((x)/32+0.36, psi-1.8, 'C2')
plt.plot((x)/64+0.836, psi-1.8, 'C3')
plt.xlim(0, 1)
show_signal()
Continuous wavelet transform (nice plots) $${\displaystyle X(a,b)={\frac {1}{\sqrt{a}}}\int _{-\infty }^{\infty }x(t){\overline {\psi }}\left({\frac {t-b}{a}}\right)\,dt}$$
Usually (for discrete wavelet transform) $\Delta t\Delta \omega>\pi$ $$\psi_{m,n}(t) = \frac{1}{\sqrt{2^m}}\psi\left(\frac{t-n2^m}{2^m}\right)$$
import pywt
import numpy as np
import matplotlib.pyplot as plt
nsamples =1024
fig, ax = plt.subplots(1,1)
x = np.linspace(0, 2*np.pi, nsamples, endpoint=False)
y = np.sin(4*x*(1+x)) + np.sin(4*x) + np.random.random(x.size)
nscales=9*nsamples/16
n = np.arange(0, nscales)
scales = 2**(n/nscales * np.log2(nsamples))
coef, freqs=pywt.cwt(y, scales, 'morl', sampling_period=1)
plt.imshow(coef, extent=[0, 2*np.pi, 0, nscales], cmap='jet', aspect='auto')
fr = np.array([1, 4, 10, 20, 50, 100, 200, 500])
n = nscales*np.log2(fr)/np.log2(nsamples)
ax.set_yticks(n)
ax.set_yticklabels(fr)
plt.show()
def mywavletplot(sig, T, Vmin=None, Vmax=None, labels=False, wavelet_type='mexh'):
nsamples = len(sig)
nscales=9*nsamples/16
n = np.arange(1, nscales)
scales = 2**(n/nscales * np.log2(nsamples))
coef, freqs=pywt.cwt(sig, scales, wavelet_type)
freqs /= T
# ax = plt.subplots(2,2)
plt.subplot(2,2,1)
t = np.linspace(0, T, nsamples)
plt.plot(t, sig)
#plt.matshow(coef) # doctest: +SKIP
if labels==True:
fr = np.array([1, 4, 10, 20, 50, 100, 200, 500])
n = nscales*np.log2(fr)/np.log2(nsamples)
ax = plt.subplot(2,2,3)
plt.imshow(coef, extent=[0, T, 1, nscales], cmap='jet', aspect='auto', vmin=Vmin, vmax=Vmax)
if labels==True:
ax.set_yticks(n)
ax.set_yticklabels(fr)
ax = plt.subplot(2,2,2)
coef2 = coef**2
plt.imshow(coef2, extent=[0, T, 1, nscales], cmap='viridis', aspect='auto')
if labels==True:
ax.set_yticks(n)
ax.set_yticklabels(fr)
#plt.colorbar()
ff = np.log(1/scales)# 2*np.pi/freqs/nwidths
ax = plt.subplot(2,2,4)
max = np.max(coef2)
levels = [0.0, 0.1, 0.25, 0.5, 0.8, 1]
contour = plt.contour(t, ff, coef2/max, levels, colors='k')
contour_filled = plt.contourf(t, ff, coef2/max, levels, cmap='jet')
plt.show()
mywavletplot(y, 2*np.pi, wavelet_type = 'morl', labels=True)
sig2 = asset_universe[tickers[1]][::40]
len(sig2)
mywavletplot(sig2, 1)
data = asset_universe[tickers[1]][::40]
log_returns = np.log(1+np.diff(data)/data[0:-1])
mywavletplot(log_returns, 1)
data = asset_universe[tickers[2]][::40]
log_returns = np.log(1+np.diff(data)/data[0:-1])
mywavletplot(log_returns, 1)
def example_wavelet_smooth(data, smooth_level=7, max_level=9):
wavelet_type='db6'
DWTcoeffs = pywt.wavedec(data,wavelet_type,mode='symmetric', level=max_level, axis=-1)
for wave in DWTcoeffs[-smooth_level:]:
wave[:] = np.zeros_like(wave)
filtered_data_dwt=pywt.waverec(DWTcoeffs,wavelet_type,mode='symmetric',axis=-1)
plt.subplot(2, 1, 1)
plt.plot(data,label='original data', color='blue')
plt.legend(['original'])
plt.subplot(2, 1, 2)
plt.plot(data.index, filtered_data_dwt, color='red', label='filtered data (DWT)')
plt.legend(['filtered (DWT)'])
#asset_universe[tickers[1]].describe()
data = asset_universe[tickers[0]]
example_wavelet_smooth(data, smooth_level=7)
def example_wavelet_components(data, max_level=9):
wavelet_type='db6'
DWTcoeffs = pywt.wavedec(data,wavelet_type,mode='symmetric', level=max_level, axis=-1)
plt.figure(figsize=[12, 4], dpi=90)
plt.subplot(2, 3, 1)
plt.plot(data,label='original data', color='blue')
plt.legend(['original'])
for n, component in enumerate(list(range(4))+[max_level]):
plt.subplot(2, 3, 2+n)
Newcoeffs = DWTcoeffs.copy()
for lev in range(max_level+1):
if(lev!=component):
Newcoeffs[lev] = np.zeros_like(Newcoeffs[lev])
filtered_data_dwt=pywt.waverec(Newcoeffs, wavelet_type, mode='symmetric',axis=-1)
plt.plot(data.index, filtered_data_dwt, color='C'+str(component+1), label='filtered data (DWT)')
plt.legend([f'component {component}'])
print(f" component {component} has a time scale length {DWTcoeffs[component].size} DT = {len(data)/DWTcoeffs[component].size:.1f}")
example_wavelet_components(data, max_level=12)
Very often we need to solve the algebraic system of equations: $$A\vec x=\vec b\qquad A\in\mathbb{R}^{M\times N},\; \vec x\in \mathbb{R}^N,\; \vec b\in \mathbb{R}^M\qquad M,\; N\in \mathbb{N}$$ Unless stated otherwise we will focus on square matrices $M=N$.
If $A$ is invertible the system always has a unique solution which can be found via Gauss method.
Unfortunatelly Gauss ellimination method requires lots of subtractions which can lead to catastrophic cancellations.
Fortunatelly there exists a standard method which, in principle, should always give a solution if $A$ is invertible.
The method is called $LU$ decomposition, wich in some sense is similar to Gauss ellimination method but numeically more stable.
We rewrite the matrix $A=LU$, where $L$ and $U$ are lower and upper triangular matrices.
$LU$ decomposition is quite time $\color{red}{\mathcal{O}(N^3)}$ and memory $\mathcal{O}(N^2)$ consuming.
Therefore for certain problems (special symmetries of $A$ or other properties) it is always crucial to use other methods:
We rewrite $A=LU$ (or more precisely $A=PUL$ or $A=LUP$, where $P$ is a perturbation matrix), where where $L$ and $U$ are lower and upper triangular matrices. Why?
$$Ax=b\;\Rightarrow\;LUx=b\;\Rightarrow\;\begin{cases}
Ly&=b\\
Ux&=y
\end{cases}$$
Instead of a single equation we have two equations, but equations wich are easier to solve.
Forward substitution $$\begin{bmatrix}l_{11}&0&0\\l_{21}&l_{22}&0\\l_{31}&l_{32}&l_{33}\end{bmatrix} \begin{bmatrix}y_1\\y_2\\y_3\end{bmatrix}= \begin{bmatrix}b_1\\b_2\\b_3\end{bmatrix} $$
$$y_1=\frac{b_1}{l_{11}},\qquad y_2=\frac{b_2-l_{21}y_1}{l_{22}},\qquad y_3=\frac{b_3-l_{31}y_2-l_{32}y_2}{l_{33}}$$Backward substitution $$ \begin{bmatrix}u_{11}&u_{12}&u_{13}\\0&u_{22}&u_{23}\\0&0&u_{33}\end{bmatrix} \begin{bmatrix}x_1\\x_2\\x_3\end{bmatrix}= \begin{bmatrix}y_1\\y_2\\y_3\end{bmatrix} $$
$$x_3=\frac{y_3}{u_{33}},\qquad x_2=\frac{y_2-u_{23}x_3}{u_{22}},\qquad x_1=\frac{y_1-u_{12}x_2-u_{13}x_3}{u_{11}}$$Note that diagnal elements $u_{ii}$ $l_{ii}$ cannot be equal to $0$.
The solution of $Ly=b$ and $Ux=y$ costs $\mathcal{O}(N^2)$ calculations.
How to decompose $A$ into a product of two matrices?
Is it unique? No, there are two basic algorythms
$$\begin{bmatrix} a_{11}&a_{12}&\cdots &a_{1n}\\a_{21}&a_{22}&\cdots &a_{2n}\\\vdots &\vdots &\ddots &\vdots \\a_{n1}&a_{n2}&\cdots &a_{nn}\end{bmatrix}= \begin{bmatrix}1&0&\cdots &0\\l_{21}&1&\cdots &0\\\vdots &\vdots &\ddots &0\\l_{n1}&l_{n2}&\cdots &1\end{bmatrix}\cdot \begin{bmatrix}u_{11}&u_{12}&\cdots &u_{1n}\\0&u_{22}&\cdots &u_{2n}\\\vdots &\vdots &\ddots &\vdots \\0&0&\cdots &u_{nn}\end{bmatrix}$$
In general $$ u_{ij}=a_{ij}-\sum _{k=1}^{i-1}l_{ik}u_{kj}\qquad i\leq j\leq n$$
$$ l_{ji}={\frac {1}{u_{ii}}}\left(a_{ji}-\sum _{k=1}^{i-1}l_{jk}u_{ki}\right) \qquad i< j\leq n$$We have to find $N^2$ elements.
Calculation of those elements require sums of $N/2$ elements in average, hence the complexity $\mathcal{O}(N^3)$.
Problem: we have no guarantee that $u_{ii}\neq 0$. The algorithm has to be modified (pivoting partial (sufficient) or full): we exchange the current row with the row which has in the current column the largest modulus.
Row replecements can be represented as multiploication by a perturbation matrix $P$ which has $N$ elements equal to 1 and $N(N-1)$ equal to zero (never store the full matrix nor use full multiplcation).
import pprint as pp
import scipy as sc
import numpy as np
import scipy.linalg as lg # SciPy Linear Algebra Library
def lin_alg_example():
A = np.matrix(np.random.rand(3,3))
b = np.matrix(np.random.rand(3,1))
P, L, U = lg.lu(A)
print ("A:"); pp.pprint(A)
print ("b:"); pp.pprint(b)
print ("P:"); pp.pprint(P)
print ("L:"); pp.pprint(L)
print ("U:"); pp.pprint(U)
return A, b
A, b = lin_alg_example()
x = lg.solve(A, b)
print ("x:") ; pp.pprint(x)
print ("test:") ; pp.pprint(A*x)
def tridiagExample(N, printout=True):
A=np.matrix(np.zeros([N,N]))
for n in range(N):
A[n,n] = -2
for n in range(N-1):
A[n+1,n] = A[n,n+1] = 1
if printout==True:
print(A)
P, L, U = lg.lu(A)
print ("A:"); pp.pprint(A)
# print ("P:"); pp.pprint(P)
print ("nonzero elements of L:"); pp.pprint(np.int8((L!=0)))
print ("nonzero elements of U:"); pp.pprint(np.int8((U!=0)))
return A
tridiagExample(5)
A=tridiagExample(100,False)
Discretization gives $$y_n''\approx \frac{y_{n-1}-2y_n+y_{n+1}}{h^2}\,,\qquad x_n =-1+hn\,,\qquad h = \frac{2}{N-1}=\text{const}$$
Tridiagoanal matrix with equations elementsfor $0<n<N-1$
$$D_{n*}=\left(0, 0, \ldots, \frac{1}{h^2}, -\frac{2}{h^2}, \frac{1}{h^2}, 0, \ldots, 0, 0\right)\,,\quad b_n=6x_n$$plus two more (boundary) equations
$$D_{0, *}=(1, 0, 0,\ldots)\,,\qquad b_0=0$$$$D_{N-1, *}=(\ldots, 0, 0, 1)\,,\qquad b_{N-1}=2$$import scipy.sparse as sp # provides sparse matrices
import scipy.sparse.linalg as sl
def init_vectors(N, y1, y2):
x = np.linspace(-1, 1, N); # grid points
b = 6.*x; # example function
h = x[1]-x[0] # grid spaceing
u = np.ones(N)/(h*h)
data = np.array([u, -2*u, u]) # array Nx3 to fill the matrix
data[1,0] = data[1,-1] = 1 # elements for boundary conditions
data[2,1] = data[0, -2] = 0
b[0] = y1
b[-1] = y2
diags = np.array([-1, 0, 1]) # which diagonals to fill
D_sparse = sp.spdiags(data, diags, N, N, format='csc') # sparse 5-diagonal matrix
return h, x, b, D_sparse, data
import time
N=3000
h, x, b, D2, data = init_vectors(N, -1, 2)
A = D2.todense()
t = np.zeros(4)
t[0] = time.time() ; z1 = np.linalg.solve(A, b) ;
t[1] = time.time() ; z2 = sl.spsolve(D2, b);
t[2] = time.time() ; z3 = lg.solve_banded((1, 1), data[-1::-1], b); t[3] = time.time()
T = np.diff(t)
index = ['full solve', 'sparse', 'banded']
columns = ['time [ms]', 'relative time']
time_data = np.array([1000*T, T/np.min(T)]).transpose()
pandata = pd.DataFrame(time_data, index=index, columns=columns)
styles = [dict(selector="tr", props=[("font-size", "200%")]) ]
pandata.style.set_table_styles(styles).format({'time [ms]': "{: 10.3f}", 'relative time': '{: 10.2f}'})
plt.plot(x,z2)
plt.plot(x,z3)
plt.show()
N=20
h, x, b, D2, data = init_vectors(N, 2, -1)
A = D2.todense()
z2 = sl.spsolve(D2, b)
print(z2)
print(D2*z2-b)
p = (b[0]+b[-1])/2
s = p-1-b[0]
z3 = x**3+s*x+p
print(D2*z3-b)
print(A[-1, :])