In [1]: import numpy as np
In [2]: import statsmodels.api as sm
Create some data
In [3]: nsample = 50
In [4]: sig = 0.25
In [5]: x1 = np.linspace(0, 20, nsample)
In [6]: X = np.c_[x1, np.sin(x1), (x1-5)**2, np.ones(nsample)]
In [7]: beta = [0.5, 0.5, -0.02, 5.]
In [8]: y_true = np.dot(X, beta)
In [9]: y = y_true + sig * np.random.normal(size=nsample)
Setup and estimate the model
In [10]: olsmod = sm.OLS(y, X)
In [11]: olsres = olsmod.fit()
In [12]: print olsres.params
[ 0.50898072 0.51916098 -0.02158601 4.99563796]
In [13]: print olsres.bse
[ 0.01141961 0.04489185 0.00100265 0.07404521]
In-sample prediction
In [14]: ypred = olsres.predict(X)
Create a new sample of explanatory variables Xnew, predict and plot
In [15]: x1n = np.linspace(20.5,25, 10)
In [16]: Xnew = np.c_[x1n, np.sin(x1n), (x1n-5)**2, np.ones(10)]
In [17]: ynewpred = olsres.predict(Xnew) # predict out of sample
In [18]: print ypred
[ 4.4559876 4.95431237 5.41158849 5.79952214 6.1000306
6.30821316 6.43315634 6.49644096 6.52859648 6.56408511
6.63563975 6.76888609 6.97813236 7.26401852 7.61341116
8.00156122 8.39617026 8.76269694 9.07003211 9.29560991
9.42911534 9.47417891 9.44778022 9.37745824 9.2967867
9.2398581 9.23568231 9.30342042 9.44923656 9.66528521
9.93100013 10.2164734 10.48736917 10.71056495 10.85959353
10.91899149 10.8868398 10.77507906 10.60754778 10.41606569
10.2352059 10.09661463 10.02381182 10.02832687 10.10780439
10.24639119 10.41734091 10.58740837 10.72231167 10.79236459]
In [19]: import matplotlib.pyplot as plt
In [20]: plt.figure()
Out[20]: <matplotlib.figure.Figure at 0x4356a68c>
In [21]: plt.plot(x1, y, 'o', x1, y_true, 'b-')
Out[21]:
[<matplotlib.lines.Line2D at 0x43908fec>,
<matplotlib.lines.Line2D at 0x4390f3cc>]
In [22]: plt.plot(np.hstack((x1, x1n)), np.hstack((ypred, ynewpred)),'r')
Out[22]: [<matplotlib.lines.Line2D at 0x4390f90c>]
In [23]: plt.title('OLS prediction, blue: true and data, fitted/predicted values:red')
Out[23]: <matplotlib.text.Text at 0xd68520c>