linear regression using NumPy and SciPy

test of a simple linear regression in pure Python, and using NumPy's polyfit(). Additionally a linear regression in a higher dimensional space.

by XEmacs 2 years, 3 months ago and tagged with: NumPy python linear-regression
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import numpy
import scipy
import scipy.optimize

NUMBER_POINTS = 40

def model_values(x_w, y_w):
    """Returns model values for a world value tuple."""
    x_m = 140 * (0.005 * (x_w / 25.5 + 2) ** 2 - 0.007 * (y_w / 25.5 + 1.5) ** 2 + 1)
    y_m = 140 * ((0.05 * x_w / 25.5 + 0.25) ** 2 + 0.15 * y_w / 25.5)
    return x_m, y_m

def randomised_model_values(x_w, y_w):
    return numpy.random.normal(0, 1e1, 2) + model_values(x_w, y_w)


class LinearRegression(object):
    def __init__(self, data_points):
        self.data_points = data_points
        self._a, self._b = None, None
        self._solve()

    def _solve(self):
        number = len(self.data_points)
        sumXX, sumXY, sumX, sumY = 0.0, 0.0, 0.0, 0.0
        for x, y in self.data_points:
            sumXY += x * y
            sumXX += x * x
            sumX += x
            sumY += y
        self._a = (number * sumXY - sumX * sumY) / (number * sumXX - sumX * sumX)
        self._b = (sumY - self._a * sumX) / number

    def predict(self, x):
        return self._a * x + self._b


class LinearRegression2(object):
    def __init__(self, data_points):
        self.data_points = data_points
        self._a, self._b = None, None
        self._solve()


    def _solve(self):
        data_points = numpy.array(self.data_points)
        x_values = data_points[:, 0]
        y_values = data_points[:, 1]
        self._a, self._b = numpy.polyfit(x_values, y_values, 1.0)


    def predict(self, x):
        return self._a * x + self._b


class SpacialRegression(object):
    def __init__(self, data_points):
        self.data_points = data_points
        self.dimensions = len(data_points[0][0])
        self._a, self._b = None, None
        self._vector = numpy.ones(self.dimensions ** 2 + self.dimensions)
        self._solve()


    def _vector2ab(self, vector):
        a, b = vector[:-self.dimensions], vector[-self.dimensions:]
        a.shape = self.dimensions, self.dimensions
        return a, b


    def _ab2vector(self, a, b):
        vector = numpy.zeros(self.dimensions ** 2 + self.dimensions)
        vector[:-self.dimensions] = a.flatten()
        vector[-self.dimensions:] = b
        return vector


    def _rmse(self, vector):
        a, b = self._vector2ab(vector)
        rmse = 0.0
        for x, y in self.data_points:
            y_estimate = numpy.dot(a, x) + b
            delta = numpy.linalg.norm(y - y_estimate)
            rmse += delta * delta
        return rmse


    def _solve(self):
        vector, fopt, _, _, _, _ = scipy.optimize.fmin_powell(self._rmse, self._vector,
                                            xtol=0.000001, ftol=0.000001,
                                            disp=False, full_output=True)
        print '###', numpy.sqrt(fopt / len(self.data_points))
        self._a, self._b = self._vector2ab(vector)


    def predict(self, x):
        return numpy.dot(self._a, x) + self._b



def main():
    expected = model_values(0.0, 0.0)
    deltas = []
    for i in range(20):
        data_points_x = numpy.abs(numpy.random.normal(0.0, 64, size=(NUMBER_POINTS, 2)))
        data_points = [(x, randomised_model_values(*x)) for x in data_points_x]

        regressor = SpacialRegression(data_points)
        predicted = numpy.array(regressor.predict([0.0, 0.0]))
        delta = numpy.linalg.norm(predicted - expected)
        print 'delta =', delta
        deltas.append(delta)
    deltas = numpy.array(deltas)
    print '  average delta =', deltas.mean()
    print '  std. dev. =', deltas.std()


def main_linear():
    data_points = [(1.0, 1.2), (2.0, 2.8), (3.0, 5.1), (4.0, 6.8)]
    regressor = LinearRegression2(data_points)
    predicted = numpy.array(regressor.predict(1.0))
    print 'predicted =', predicted


if __name__ == '__main__':
    main()

Currently 0 comments

To post a comment, you must login.