trained data only classifies first complaint type

17 Views Asked by At

<?php

require '../vendor/autoload.php';

use Phpml\Classification\NaiveBayes;

$errormsg = '';


I try to create a complaint but only the first complaint type will classify even though the keyword is not matched in discrimination type


$trainingData = [

    ['complaint' => 'I experienced discrimination because of my race', 'type' => 'discrimination', 'language' => 'english'],
    ['complaint' => 'Naranasan ko ang diskriminasyon dahil sa aking lahi', 'type' => 'discrimination', 'language' => 'filipino'],
    ['complaint' => 'I am struggling with my academics', 'type' => 'academic issues', 'language' => 'english'],
    ['complaint' => 'Nahihirapan ako sa aking pag-aaral', 'type' => 'academic issues', 'language' => 'filipino'],
    ['complaint' => 'I witnessed teacher misconduct in the classroom', 'type' => 'teacher misconduct', 'language' => 'english'],
    ['complaint' => 'Nakakita ako ng di-maayos na pag-uugali ng guro sa silid-aralan', 'type' => 'teacher misconduct', 'language' => 'filipino'],
    ['complaint' => 'The school facilities are in poor condition', 'type' => 'facilities and infrastructure problems', 'language' => 'english'],
    ['complaint' => 'Ang mga pasilidad ng paaralan ay nasa masamang kalagayan', 'type' => 'facilities and infrastructure problems', 'language' => 'filipino'],
    ['complaint' => 'I feel unsafe and insecure in the school premises', 'type' => 'safety and security concerns', 'language' => 'english'],
    ['complaint' => 'Nararamdaman ko ang kawalan ng kaligtasan at katiyakan sa loob ng paaralan', 'type' => 'safety and security concerns', 'language' => 'filipino'],
           ];
           

function preprocess($complaint, $language)
{
    if ($language === 'filipino') {
    } else {
        $complaint = preg_replace("/[^a-zA-Z ]+/", "", $complaint);
        $complaint = strtolower($complaint);
    }

    return $complaint;
}


I think there is a problem in this function wherein it only classify the first complaint type



// Function to train the Naive Bayes classifier
function trainNaiveBayes($trainingData)
{
    $classCounts = [];
    $classFeatureCounts = [];
    $vocabulary = [];

    foreach ($trainingData as $data) {
        $complaint = preprocess($data['complaint'], $data['language']);
        $type = $data['type'];

        
        if (!isset($classCounts[$type])) {
            $classCounts[$type] = 0;
        }
        $classCounts[$type]++;

       
        $features = explode(" ", $complaint);
        foreach ($features as $feature) {
            if (!isset($classFeatureCounts[$type][$feature])) {
                $classFeatureCounts[$type][$feature] = 0;
            }
            $classFeatureCounts[$type][$feature]++;

    
            if (!in_array($feature, $vocabulary)) {
                $vocabulary[] = $feature;
            }
        }
    }


    $priorProbabilities = [];
    $totalComplaints = count($trainingData);
    foreach ($classCounts as $type => $count) {
        $priorProbabilities[$type] = $count / $totalComplaints;
    }

    $conditionalProbabilities = [];
    $vocabularySize = count($vocabulary);
    foreach ($classFeatureCounts as $type => $features) {
        $totalFeatures = array_sum($features);
        foreach ($vocabulary as $word) {
            $count = isset($features[$word]) ? $features[$word] : 0;
            $probability = ($count + 1) / ($totalFeatures + $vocabularySize);
            $conditionalProbabilities[$type][$word] = $probability;
        }
    }

    return ['priorProbabilities' => $priorProbabilities, 'conditionalProbabilities' => $conditionalProbabilities];
}


I try to create a complaint but only the first complaint type will classify even though the keyword is not matched in discrimination type



function classifyComplaint($complaintDetails, $trainedModel, $language)
{
    $complaint = preprocess($complaintDetails, $language);
    $priorProbabilities = $trainedModel['priorProbabilities'];
    $conditionalProbabilities = $trainedModel['conditionalProbabilities'];
    $vocabulary = array_keys($conditionalProbabilities);

    $words = explode(" ", $complaint);

    $classProbabilities = [];
    foreach ($priorProbabilities as $type => $priorProbability) {
        $classProbability = $priorProbability;
        foreach ($words as $word) {
            if (isset($conditionalProbabilities[$type][$word])) {
                $classProbability *= $conditionalProbabilities[$type][$word];
            } else {
            
                $classProbability *= (1 / count($vocabulary));
            }
        }
        $classProbabilities[$type] = $classProbability;
    }


    arsort($classProbabilities);
    $classifiedType = key($classProbabilities);

    return $classifiedType;
}

?>


help me to fix this.


I also use Naive Bayes Algorithm

0

There are 0 best solutions below