Formulario
Formulario
%linspace command
%row vector of n equally spaced points between X1 and X2.
%X1 is the first input, X2 is the second input, n is the third input (if
%we don't specify n, it is automatically 100)
x = linspace(0,1,10)
%X1 and X2 will always be the first and the last element of the vector
%“size” function
%associate to the first value (the number of rows) the r.v. n_rows and to
%the second value (the number of columns) the r.v. n_cols
[n_rows, n_cols] = size(A)
%“sum” function
sum(A) %sum of the elements on each column of A (column-wise sum)
sum(A,2) %sum of the elements on each row of A (row-wise sum)
%plots
plot(x, y,'ok') %scatter plot with black points
plot(x, y,'-b') %line plot with blue lines
plot(x, y,'-or') %plot with red lines and points
figure(1), plot(x,y,'ok'),title('blabla') %we put the plot into figure 1 and we
%assign a title to it
%“diff” function
x = [1,2,3,4,5]’
diff(x) %the first component of this vector contains the difference between
%the 2nd and the 1st component of the original vector, and so on
%logical indexing
n = sum(x == 1) %how many times x is = 1?
mean(price(price > 1)) %mean price conditionally on having the price > 1
x = z(z<0 & w>=2) %vector that contains the components of z that are in
%correspondence of the elements of z that are < 0 AND the %elements of w that are >= 2
y = z(z<0 | w>=2) %vector that contains the components of z that are in
%correspondence of the elements of z that are < 0 OR the %elements of w that are >= 2
%if statement
u = rand %uniformly distributed in [0,1] random variable
%“cumsum” function
a = (1:10)'
cumsum(a) %the element j is the sum of j and all the elements before j
%the first element of cumsum(a) coincides with the first element of a
%strcmp command
%the string compare command compares each row of a column with a cell.
we obtain a logical vector (1 if the comparison is true, 0 if it is false).
strcmp(T.Gender, 'F') %1 for females, 0 for males
%ages of females (we select the age every time there’s a female)
T.Age(strcmp(T.Gender, 'F'))
%prctile command
%how many smokers have a Systolic pressure larger than the 90 percentile (value of the Systolic pressure below which
there is the 90% of the sample)?
perc90 = prctile(T.Systolic,90)
sum((T.Systolic > perc90) & (T.Smoker==1))
%“sort” function
x = randn(100,1)
sort(x) %it puts in an increasing order the components of x (the first component
%of the vector sort(x) is the min(x), while the last component is max(x)
%“find” function
find(x==min(x)) % we find the component where the condition is true
%if Y = km travelled, X = cost of the trip, which is the estimated cost for a %trip of 600 km?
cost = (600-beta_0_hat)/beta_1_hat
%CAPM
%The CAPM is a model we use to estimate which is the effect of a market's
%variation on a stock. Therefore, we regress the return of the stock in
%excess with respect to the risk-free rate, against the return of
%the market in excess with respect to the risk-free rate.
Y = r_stock - rf %log-returns of the stock in excess wrt the risk-free rate
X = r_mkt - rf %log-returns of the market in excess wrt the risk-free rate
T = length(Y) %sample size
%beta_1 is the "beta of the stock", that expresses the sensitivity of the stock %to a market variation (higher is the beta,
higher is the sensitivity)
%if beta_1 > 1, the shocks are amplified
%if beta_1 < 1, the shocks are dumped
%OLS estimators
beta_1_hat = sum((X-mean(X)).*(Y-mean(Y)))/sum((X-mean(X)).^2) %cov(X,Y)/var(X)
beta_0_hat = mean(Y) - beta_1_hat*mean(X)
%we reject the null in favor of the alternative with significance level 5%
%if abs(t) > 1.96 (rejection rule for a two-sided test)
abs(t_stat_beta_1)>1.96 %1 = null rejected, 0 = null not rejected
%or if p-value < 0.05
p_value_beta_1_hat = 2*(1 - normcdf(abs(t_stat_beta_1)))
%OLS estimators
X = [ones(length(Y),1),X1,X2,X3]
K=3
beta_ols = ((X'*X)^(-1))*(X'*Y)
Yhat = X*beta_ols
res = Y - Yhat
SSR = sum(res.^2)
sigma2_eps_hat = SSR/(T-K-1)
var_covar = sigma2_eps_hat*((X'*X)^(-1))
std_errors = sqrt(diag(var_covar))
t_stat = beta_ols./std_errors
%Collinearity
X = [ones(5,1),X1,X2,X3,X4]
%if one of the three regressors is a linear combination of another regressor..
det(X'*X) %has a very small value, so..
(X'*X)^(-1) %Matlab calculates the inverse but gives us a warning
%VIF
X = [ones(length(Y),1),X1,X2,X3,X4]
%compute the VIF for each of the 4 regressors
VIF = NaN(4,1)
for j = 1:4
%the new regressand is the j-th regressor, which in the matrix X is not
%the column j but the column j+1 (the first regressor is the constant!)
Y_new = X(:,j+1)
%the new regressors are the remaining regressors
X_new = X
X_new(:,j+1) = [] %we remove the column corresponding to the j-th
%regressor, that is our new dependent r.v.
beta_ols_new = ((X_new'*X_new)^(-1))*(X_new'*Y_new)
Yhat_new = X_new*beta_ols_new
res_new = Y_new - Yhat_new
SSR_new = sum((res_new).^2)
SST_new = sum((Y_new - mean(Y_new)).^2)
R2 = 1 - (SSR_new/SST_new) %R2 > 0.9: multicollinearity
VIF(j) = 1/(1 - R2) %VIF > 10: multicollinearity
end
%F-test
%unrestricted model
T = length(Y)
X_U = [ones(length(Y),1),X1,X2,X3,X4]
result_U = my_ols_routine(Y,X_U)
result_U.tstat
SSR_U = result_U.SSR
% H_0: FI = CD
% H_1: FI ≠ CD
K=3
X_new = [ones(T,1),FI,FI+CD,PP]
result_new = my_ols_routine(Y,X_new)
result_new.tstat
%if the coefficient associated to FI has a t-stat > 1.96, we reject
%the null hypothesis, so FI ≠ CD
%LR test
% Unrestricted model: BD = beta_0 + beta_1*PF + beta_2*PA + beta_3*Wkg
Y = BD
T = length(Y)
X = [ones(T,1),PF,PA,Wkg]
result_U = my_ols_routine(Y,X)
result_U.beta_ols
result_U.tstat
SSR_U = result_U.SSR
%gradient of f(B)
grad = 2*A*B d(B'*A*B)/dB = 2*A*B
%where
% - A is a nxn symmetric matrix (A’= A)
% - B is a nx1 vector of the beta
% - f(B) = B’*A*B
% grad is a column vector made by the derivatives of f(B) wrt beta_1 and beta_2