```source("http://macosa.dima.unige.it/r.R")    # If I have not already loaded the library
---------- ---------- ---------- ---------- ---------- ---------- ---------- ----------
S 12 Regression. Splines. Curve fitting. Moving average (or mean).

Linear, quadratic and cubic regression:  the linear function with graph for a given
point (p,q) and the generic linear, quadratic and cubic functions that "best"
approximate some experimental data (x,y).  Commands:

regression(x,y,p,q), regression1(x,y), regression2(x,y), regression3(x,y)

If the points are 2, 3 or 4 regression1, regression2 and regression3 find exactly the
curve.
We also see how splines (approximating function graphs or curves) can be calculated.

For a more complete method of studyng linear regression see here.

If the "x" are too large [for example x=c(2007,2012,…)] an error message may appear.
In this case you can modify x (x-2000) and find the regression rB (of x-2000), and
then modify it [take r(x) = rB(x-2000)]

x = c(-0.2, 1.3, 1.7,  3.2,  4.1,  4.6)
y = c(9.1, 17.5, 20.7, 25.4, 35.7, 51.2)
Plane(-1,6,-10,70);  POINT(x,y,1)
regression(x,y, 0,0); POINT(0,0,"blue")
# 9.829 * x
f = function(x) 9.829 * x; graph(f,-1,6, "red")
regression1(x,y)
# 7.628 * x + 7.911
g = function(x) 7.628 * x + 7.911; graph(g,-1,6, "blue")
regression2(x,y)
# 1.37 * x^2 + 1.38 * x + 11.2
h = function(x) 1.37 * x^2 + 1.38 * x + 11.2; graph(h,-1,6, "violet")
regression3(x,y)
# 1.21 * x^3 + -6.3 * x^2 + 11.9 * x + 11.6
k = function(x) 1.21 * x^3 + -6.3 * x^2 + 11.9 * x + 11.6; graph(k,-1,6, "green")
POINT(0,0,"blue"); POINT(x,y,1)  # On the right, the curve that goes exactly for (-1,-10),(0,-1),(1,-2),(2,5)

Plane(-2,3,-20,20)
POINT(-1,-10, 1); POINT(0,-1,"red"); POINT(1,-2,"green"); POINT(2,5,"blue")
regression3( c(-1,0,1,2), c(-10,-1,-2,5) )
# 3 * x^3 + -5 * x^2 + 1 * x + -1
f = function(x) 3 * x^3 + -5 * x^2 + 1 * x + -1
graph(f,-2,3, 6)
POINT(-1,-10, 1); POINT(0,-1,"red"); POINT(1,-2,"green"); POINT(2,5,"blue")

# The cubic spline that interpolates the points x,y (see): x = c(-0.2, 1.3, 1.7,  3.2,  4.1,  4.6)
y = c(9.1, 17.5, 20.7, 25.4, 35.7, 51.2)
Plane(-1,6,-10,70);  POINT(x,y,1); L=spline(x,y,1000)

# I calculated and put in L the 1000 points of the spline passing through the
# points x,y

polyline(L\$x,L\$y,"red")                              #  I traced it in red

# The "extended" (blue) curve that passes by the same points, lowered by 10.

POINT(x,y-10,1); L1=spline(x,y-10,1000, xmin=-1,xmax=5)
polyline(L1\$x,L1\$y, "blue")

# If I want to find a value of y I can do, for example:
L=spline(x,y-10,1e6, xmin=-1,xmax=5); Yspline(0.5,L)
#  1.383594
POINT(0.5,1.383594, "red")
# Yspline(0.5,L) [and Xspline(0.5,L)] product the coordinate of the point with x=0.5 of
# the spline L  [I must use less the 1e8 points; in the example the number is 1e6]

# If you have some points of a closed curve you can approximate it with the command
# splineC(x,y,ColBorder,ColInside), or splinC if you want a thin-stroke curve. If you
# choose NULL as internal color the figure is transparent.

BF=3; HF=3; PLANE(-2,8, -2,8)
u=c( -1, 4, 7,6,3,0)
v=c(-1.5,0,-1,6,7,5)
splineC(u,v,"red",NULL); POINT(u,v,"seagreen")
PLANE(-2,8, -2,8); splinC(u,v,"blue","yellow") # The points used to trace the curve are in splineCx(x,y) and splineCy(x,y)
length(splineCx(u,v))
# 104
c( splineCx(u,v),splineCy(u,v) )
# -1.0 -1.5
c( splineCx(u,v),splineCy(u,v) )
#  -0.8442124 -1.6377163
# If you want to have approximately the area of the closed curve you can use areaPol:
areaPol(splineCx(u,v),splineCy(u,v))
# 53.88609

# If we want to approximate points with a curve y = A·exp(B·x) we can use regression1
# after transforming y-coordinates by the logarithmic function. If we want approximate
# them with a polynomial curve we can transform x-coordinates too: see here  #
# See here to see other methods of curve fitting (with the commands F_RUN1, F_RUN2,
# F_RUN3, F_RUN4). #
# How to proceed for a regression of the second order when I have a fixed point?
# Without using specific methods, I can do the following:
x = c(-0.2, 1.3, 1.7,  3.2,  4.1,  4.6)
y = c(9.1, 17.5, 20.7, 25.4, 35.7, 51.2)
Plane(-1,6,-10,70);  POINT(x,y,"black")
# Without a fixed point:
regression2(x,y)
# 1.37 * x^2 + 1.38 * x + 11.2
h = function(x) 1.37*x^2 + 1.38*x + 11.2; graph2(h,-1,6, "magenta") # If I have the fixed point (3.2,25.4) I can force the passage for a large amount of
# points (for example 1e6) that are equal to (3.2,25.4)!
x1=c(rep(3.2,1e6),x); y1=c(rep(25.4,1e6),y)
Plane(-1,6,-10,70);  POINT(x,y,"black")
regression2(x1,y1)
#  2.19 * x^2 + -2.91 * x + 12.3
h1 = function(x) 2.19 * x^2 + -2.91 * x + 12.3
graph2(h1,-1,6, "blue"); POINT(3.2,25.4, "red")

#

# Another example of use of regression:
# A car (initially) goes at a steady speed. The driver sees an obstacle and (after 1.5
# seconds) starts braking. Here are the photos taken every half second from the instant
# in which he saw the obstacle. What was the initial speed of the car? # Times (in seconds) and spaces (in meters):
s0 = c(1.5,2.0,2.5,3.0,3.5,4.0,4.5,5.0,5.5,6.0,6.5,7.0,7.5,8.0,8.5,9.0,9.5)
m0 = c( 70, 92,114,133,151,168,183,198,209,221,230,238,244,250,253,255,257)

# Since he has begun to brake:

s = s0-1.5; m = m0-70; s; m
# 0.0 0.5 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0
#   0  22  44  63  81  98 113 128 139 151 160 168 174 180 183 185 187

# The graph (time,space):
Plane(-1/2,8.5, -1,190); POINT(s,m, "blue")
abovex("sec"); abovey("m") # I want to impose that s(0)=0. I proceed as in the last example:

s1 = c(rep(0,1e6), s); m1 = c(rep(0,1e6), m)
regression2(s1,m1)
#  -2.89 * x^2 + 46.5 * x + -2.89e-07    # I neglect -2.89e-07
F = function(x) -2.89 * x^2 + 46.5 * x
graph2(F,0,8, "brown")               # See the chart above
# I find the derivative  (I can find it "by hand" but ...)
deriv(F,"x")
# 46.5 - 2.89 * (2 * x)
# The initial speed is 46.5 m/s.
46.5/1000*60*60
# 167.4        167 km/h

# It is easy to calculate the moving average (or moving mean)  of a data sequence (to
# smooth out the fluctuations).  An example:
# I have the file of  "mm of monthly rainfall in Genoa in the period 1984-92", here.
# In Windows, I can load the file or I can copy the file to the clipboard and then with
readLines("clipboard")
# if I want, I can view the file:
#  "# mm of monthly rainfall in Genoa in the period 1984-92; make the moving averages"
#  "1,41"
#  ...
#  "108,72"
# In any case, I put the table in a variable (I use the same separator ","; I have no line to jump)
rain = read.table("clipboard",sep=",")
# In Mac I can't load a table with "clipboard". I can copy the file in a text-file and use this;
# here I copied it in "macosa.dima.unige.it/R/rain.txt"; in the same way I proceed if I copied
# it to my PC. Obviously I can proceed like this even in Windows.
readLines("http://macosa.dima.unige.it/R/rain.txt")       # if I want to see the file
rain = read.table("http://macosa.dima.unige.it/R/rain.txt",sep=",")
# If I want I can view (better) the file:
str(rain)
# 'data.frame':   108 obs. of  2 variables:
# \$ V1: int  1 2 3 4 5 6 7 8 9 10 ...
# \$ V2: int  41 44 72 79 197 62 0 200 52 95 ...
# I obtain the first graph with:
x = rain\$V1; y = rain\$V2; max(y)
# 423
BF=6; HF=2
Plane(0,110,0,450)
polylin(x,y, "red")
Point(x,y, "blue") # To put in D2 the moving averages (of order 3) of D1, ..., D2[nD] I can use:
# D2 = D1; for ( i in 2:(nD-1) ) D2[i] <- (D1[i-1]+D1[i]+D1[i+1])/3; D2 <- c(D2,D1[nD])
# How to obtain the second graph:
n = 108
D1 = y; nD = n
D2 = D1; for ( i in 2:(nD-1) ) D2[i] <- (D1[i-1]+D1[i]+D1[i+1])/3; D2 <- c(D2,D1[nD])
y1=D2
max(y1)
# 230
Plane(0,110,0,250)
polylin(x,y1,"red"); Point(x,y1,"blue")
#
# For the last graph I make the moving averages 3 times:
for(i in 1:3) {
D1 = D2; nD = n
D2 = D1; for ( i in 2:(nD-1) ) D2[i] <- (D1[i-1]+D1[i]+D1[i+1])/3; D2 <- c(D2,D1[nD])
}
y2=D2
max(y2)
# 170.2099
Plane(0,110,0,200)
polylin(x,y2,"red")
# I mark the division into years:
for(i in 0:10) segm(1+i*12,0, 1+i*12,200, 0)

Other examples of use

```