vector of struct in linear regression

68 Views Asked by At

I'm new to C++ programming and I'm trying to code a simple linear regression program that returns the parameters of the affine function y=ax+b.

My concern is that I can't "use" the vector containing the x and y coordinates of my p points representing my point cloud in my "fit" and "mse" functions. Thank you in advance for your help.

Greetings

Frederick

#include <iostream>
#include <iomanip>
#include <cmath>
#include <vector>

using namespace std;

struct Point {
    double x;
    double y;
};

struct line {
    double a;
    double b;
};

const vector<Point> points({ { 1.2, 2.3 },
                             { 2.4, 2.9 },
                             { 1.9, 2.5 },
                             { 2.5, 3.8 },
                                          });

void affiche(const Point& p);
void fit(const Point& p, line& d);
void line_value(const line& d, double x);
void affiche_line(const line& d);
double mse(const line& d, const Point& p);

int main() {

    line l;
    Point p;
    
    fit(p, l);
    cout << endl;
    double x(2.0);
    line_value(l, x);
    cout << endl;
    affiche_line(l);
    cout << endl;
    cout << "MSE = " << mse(l, p) << endl;

    return 0;
}

void affiche(const Point& p) {
    for(auto p : points) {
        cout << "x= " << p.x << " , y= " << p.y << endl;
    }
}

void fit(const Point& p, line& d) {
    double sum_x(0.0);
    double sum_y(0.0);
    
    cout << setw(10) << "x_i"
         << setw(10) << "y_i"
         << setw(10) << "sum x"
         << setw(10) << "sum y"
         << setw(10) << "x^2"
         << setw(10) << "xy"
         << endl;

    for(size_t i(0); i < points.size(); ++i) {
        sum_x += p.x;
        sum_y += p.y;
        
        d.a += (p.x * p.y - 1.0 / points.size() * sum_x * sum_y) /
               (p.x * p.x - 1.0 / points.size() * sum_x * sum_x);
        
        d.b += 1.0 / points.size() * (p.y - d.a * p.x);
        
        cout << setw(10) << p.x << setw(10) << p.y
             << setw(10) << sum_x << setw(10) << sum_y
             << setw(10) << p.x * p.x << setw(10) << p.x * p.y
             << endl;
    }
}

void line_value(const line& d, double x) {
    cout << "x= " << x << endl;
    cout << "y= " << d.a * x + d.b << endl;
}

void affiche_line(const line& d) {
    cout << "Parameter a= " << d.a << endl;
    cout << "Parameter b= " << d.b << endl;
}

double mse(const line& d, const Point& p) {
    double MSE(0.0);
    
    for(size_t i(0); i < points.size(); ++i){
        MSE += 1.0 / points.size() * pow(p.y - (d.a * p.x + d.b), 2.0);
    }
    return MSE;
}

I tried to initialize my vector of points in the main but it didn't work at all.

As I said im a beginner and i don't have any solution.

It is impossible to use the numbers in my vector to generate the values of a and b

1

There are 1 best solutions below

0
NoName On

Certainly, Frederick! The issue here is that you are not actually utilizing the points vector inside your fit and mse functions. You are only using the parameter p, which is a single Point structure, and not the vector of points.

Here's a concise fix for your code:

  1. Modify the function signatures for fit and mse to accept the points vector.
  2. Update the loop inside fit and mse to iterate over the points vector.
void fit(const vector<Point>& points, line& d); // Function signature change
double mse(const line& d, const vector<Point>& points); // Function signature change

int main() {
    //...
    fit(points, l); // Pass the points vector
    //...
    cout << "MSE = " << mse(l, points) << endl; // Pass the points vector
    //...
}

void fit(const vector<Point>& points, line& d) { // Function definition change
    //...
    for(size_t i(0); i < points.size(); ++i) {
        double x = points[i].x;
        double y = points[i].y;
        sum_x += x;
        sum_y += y;
        // Modify d.a and d.b calculations accordingly
        //...
    }
    //...
}

double mse(const line& d, const vector<Point>& points) { // Function definition change
    //...
    for(size_t i(0); i < points.size(); ++i){
        double x = points[i].x;
        double y = points[i].y;
        MSE += 1.0 / points.size() * pow(y - (d.a * x + d.b), 2.0);
    }
    return MSE;
}

These changes will make the code use the actual points in the vector to fit the line and calculate the mean squared error. Make sure to modify the formulas for a and b according to the correct linear regression calculations.