Flash Notes

Programming - Curve fitting : linear regression, exponential / logarithmic / power curve fit


• How to program curve fitting with AWK (linear regression, exponential / logarithmic / power curve fit) ?

Solution

See below ...

Program

#!/bin/sh
#
# Curve fitting
# ~~~~~~~~~~~~~
#
# lr : Linear Regression
# ecf : Exponential Curve Fit
# lcf : Logarithmic Curve Fit
# pcf : Power Curve Fit
#

# Usage display function
# ~~~~~~~~~~~~~~~~~~~~~~
usage()
{
echo "Usage: $pgm_name data_file"
echo " cat data_file | $pgm_name"
echo " $pgm_name [-h|-help|--help]"
echo ""
echo "lr : Linear Regression"
echo "ecf : Exponential Curve Fit"
echo "lcf : Logarithmic Curve Fit"
echo "pcf : Power Cuve Fit"
}

# Check script name
# ~~~~~~~~~~~~~~~~~
pgm_name=`basename $0`
case "$pgm_name" in
lr) FX=id ; FY=id ; FB=id ; FCT="y = a.x + b"
;;

ecf) FX=id ; FY=log ; FB=exp ; FCT="y = b.e^(a.x)"
;;

lcf) FX=log ; FY=id ; FB=id ; FCT="y = a.Log(x) + b"
;;

pcf) FX=log ; FY=log ; FB=exp ; FCT="y = b.x^a"
;;

*) echo "*** lr,ecf,lcf,pcf : unknown name ($pgm_name)"
usage
exit 1
;;
esac

# Check arguments
# ~~~~~~~~~~~~~~~
case "$1" in
-h|-help|--help) usage
exit 1
;;

*) ;;
esac

if [ "$1" != "" ]
then
DATA_FILE="$1"
fi

awk 'BEGIN {
# Initialize variables
# ~~~~~~~~~~~~~~~~~~~~
sum_x = 0;
sum_x2 = 0;
sum_y = 0;
sum_y2 = 0;
sum_xy = 0;
N = 0;
}
# Identity function
# ~~~~~~~~~~~~~~~~~
function id(x)
{
return x;
}
NF == 2 && $1 !~ /^[ ]*#/ {
x = '"$FX"'($1);
y = '"$FY"'($2);

sum_x += x;
sum_x2 += x*x;

sum_y += y;
sum_y2 += y*y;

sum_xy += x*y;

N++;
}
END {
a = (sum_xy - ((sum_x * sum_y)/N))/(sum_x2 -((sum_x^2)/N));
b = '"$FB"'((sum_y/N) - a*(sum_x/N));
r2 = ((sum_xy - ((sum_x*sum_y)/N))^2)/((sum_x2 - ((sum_x^2)/N))*(sum_y2 - ((sum_y^2)/N)));

printf("%s\n", "'"$FCT"'");
printf("a = %+f\n", a);
printf("b = %+f\n", b);
printf("r2 = %f\n", r2);
}' "$DATA_FILE"

Example 1

$ lr <<- EOF
 > 40.5 104.5
 > 38.6 102
 > 37.9 100
 > 36.2  97.5
 > 35.1  95.5
 > 34.6  94
 > EOF
 y  = a.x + b
 a  = +1.760149
 b  = +33.527130
 r2 =  0.990946

Example 2

$ ecf <<- EOF
0.72  2.16
1.31  1.61
1.95  1.16
2.58  0.85
3.14  0.5
EOF
y  = b.e^(a.x)
a  = -0.582025
b  = +3.445083
r2 =  0.980327

Example 3

$ lcf <<- EOF
> 3     1.5
> 4     9.3
> 6    23.4
> 10   45.8
> 12   60.1
> EOF
y  = a.Log(x) + b
a  = +41.394468
b  = -47.021198
r2 =  0.979834

Example 4

$ pcf <<- EOF
 > 10  0.95
 > 12  1.05
 > 15  1.25
 > 17  1.41
 > 20  1.73
 > 22  2.00
 > 25  2.53
 > 27  2.98
 > 30  3.85
 > 32  4.59
 > 35  6.02
 > EOF
 y  = b.x^a
 a  = +1.455587
 b  = +0.026217
 r2 =  0.935538