linear regression using NumPy and SciPy
test of a simple linear regression in pure Python, and using NumPy's polyfit(). Additionally a linear regression in a higher dimensional space.
by XEmacs 2 years, 3 months ago and tagged with: NumPy python linear-regression
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
import numpy import scipy import scipy.optimize NUMBER_POINTS = 40 def model_values(x_w, y_w): """Returns model values for a world value tuple.""" x_m = 140 * (0.005 * (x_w / 25.5 + 2) ** 2 - 0.007 * (y_w / 25.5 + 1.5) ** 2 + 1) y_m = 140 * ((0.05 * x_w / 25.5 + 0.25) ** 2 + 0.15 * y_w / 25.5) return x_m, y_m def randomised_model_values(x_w, y_w): return numpy.random.normal(0, 1e1, 2) + model_values(x_w, y_w) class LinearRegression(object): def __init__(self, data_points): self.data_points = data_points self._a, self._b = None, None self._solve() def _solve(self): number = len(self.data_points) sumXX, sumXY, sumX, sumY = 0.0, 0.0, 0.0, 0.0 for x, y in self.data_points: sumXY += x * y sumXX += x * x sumX += x sumY += y self._a = (number * sumXY - sumX * sumY) / (number * sumXX - sumX * sumX) self._b = (sumY - self._a * sumX) / number def predict(self, x): return self._a * x + self._b class LinearRegression2(object): def __init__(self, data_points): self.data_points = data_points self._a, self._b = None, None self._solve() def _solve(self): data_points = numpy.array(self.data_points) x_values = data_points[:, 0] y_values = data_points[:, 1] self._a, self._b = numpy.polyfit(x_values, y_values, 1.0) def predict(self, x): return self._a * x + self._b class SpacialRegression(object): def __init__(self, data_points): self.data_points = data_points self.dimensions = len(data_points[0][0]) self._a, self._b = None, None self._vector = numpy.ones(self.dimensions ** 2 + self.dimensions) self._solve() def _vector2ab(self, vector): a, b = vector[:-self.dimensions], vector[-self.dimensions:] a.shape = self.dimensions, self.dimensions return a, b def _ab2vector(self, a, b): vector = numpy.zeros(self.dimensions ** 2 + self.dimensions) vector[:-self.dimensions] = a.flatten() vector[-self.dimensions:] = b return vector def _rmse(self, vector): a, b = self._vector2ab(vector) rmse = 0.0 for x, y in self.data_points: y_estimate = numpy.dot(a, x) + b delta = numpy.linalg.norm(y - y_estimate) rmse += delta * delta return rmse def _solve(self): vector, fopt, _, _, _, _ = scipy.optimize.fmin_powell(self._rmse, self._vector, xtol=0.000001, ftol=0.000001, disp=False, full_output=True) print '###', numpy.sqrt(fopt / len(self.data_points)) self._a, self._b = self._vector2ab(vector) def predict(self, x): return numpy.dot(self._a, x) + self._b def main(): expected = model_values(0.0, 0.0) deltas = [] for i in range(20): data_points_x = numpy.abs(numpy.random.normal(0.0, 64, size=(NUMBER_POINTS, 2))) data_points = [(x, randomised_model_values(*x)) for x in data_points_x] regressor = SpacialRegression(data_points) predicted = numpy.array(regressor.predict([0.0, 0.0])) delta = numpy.linalg.norm(predicted - expected) print 'delta =', delta deltas.append(delta) deltas = numpy.array(deltas) print ' average delta =', deltas.mean() print ' std. dev. =', deltas.std() def main_linear(): data_points = [(1.0, 1.2), (2.0, 2.8), (3.0, 5.1), (4.0, 6.8)] regressor = LinearRegression2(data_points) predicted = numpy.array(regressor.predict(1.0)) print 'predicted =', predicted if __name__ == '__main__': main() |

Currently 0 comments