Hadoop version: 3.3.6 Virtual Machine: UTM Virtual Machine OS: Ubuntu 23.10 My machine OS: macOS
Below is the source code of the program.
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package partition;
import java.io.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
import org.apache.hadoop.util.*;
public class partition
{
//Map class
public static class MapClass extends Mapper<LongWritable,Text,Text,Text>
{
public void map(LongWritable key, Text value, Context context)
{
try{
String[] str = value.toString().split("\t", -3);
String gender=str[3];
//key = gender, value = whole string
context.write(new Text(gender), new Text(value));
}
catch(Exception e)
{
System.out.println(e.getMessage());
}
}
}
//Reducer class
public static class ReduceClass extends Reducer<Text,Text,Text,IntWritable>
{
public int max = -1;
public void reduce(Text key, Iterable <Text> values, Context context) throws IOException, InterruptedException
{
//find maximum
max = -1;
for (Text val : values)
{
String [] str = val.toString().split("\t", -3);
if(Integer.parseInt(str[4])>max)
max=Integer.parseInt(str[4]);
}
context.write(new Text(key), new IntWritable(max));
}
}
//Partitioner class
public static class CaderPartitioner extends
Partitioner < Text, Text >
{
@Override
public int getPartition(Text key, Text value, int numReduceTasks)
{
//partitions
String[] str = value.toString().split("\t");
int age = Integer.parseInt(str[2]);
if(numReduceTasks == 0)
{
return 0;
}
if(age<=20)
{
return 0;
}
else if(age>20 && age<=30)
{
return 1 % numReduceTasks;
}
else
{
return 2 % numReduceTasks;
}
}
}
public static void main(String arg[]) throws Exception
{
Configuration conf = new Configuration();
Job job = new Job(conf, "topsal");
job.setJarByClass(partition.class); /*Change name*/
FileInputFormat.setInputPaths(job, new Path(arg[0]));
FileOutputFormat.setOutputPath(job,new Path(arg[1]));
job.setMapperClass(MapClass.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//set partitioner statement
job.setPartitionerClass(CaderPartitioner.class);
job.setReducerClass(ReduceClass.class);
job.setNumReduceTasks(3);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
System.exit(job.waitForCompletion(true)? 0 : 1);
System.exit(0);
}
}
This is last part of the console output. Showing only this due to characters constraint.
2024-01-11 11:46:00,566 INFO [pool-4-thread-1] mapred.LocalJobRunner (LocalJobRunner.java:run(353)) - Finishing task: attempt_local584038357_0001_r_000002_0
2024-01-11 11:46:00,566 INFO [Thread-23] mapred.LocalJobRunner (LocalJobRunner.java:runTasks(486)) - reduce task executor complete.
2024-01-11 11:46:01,224 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1748)) - Job job_local584038357_0001 running in uber mode : false
2024-01-11 11:46:01,225 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1755)) - map 100% reduce 100%
2024-01-11 11:46:01,226 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1766)) - Job job_local584038357_0001 completed successfully
2024-01-11 11:46:01,231 INFO [main] mapreduce.Job (Job.java:monitorAndPrintJob(1773)) - Counters: 30
File System Counters
FILE: Number of bytes read=3588
FILE: Number of bytes written=2460844
FILE: Number of read operations=0
FILE: Number of large read operations=0
FILE: Number of write operations=0
Map-Reduce Framework
Map input records=14
Map output records=0
Map output bytes=0
Map output materialized bytes=18
Input split bytes=145
Combine input records=0
Combine output records=0
Reduce input groups=0
Reduce shuffle bytes=18
Reduce input records=0
Reduce output records=0
Spilled Records=0
Shuffled Maps =3
Failed Shuffles=0
Merged Map outputs=3
GC time elapsed (ms)=8
Total committed heap usage (bytes)=866123776
Shuffle Errors
BAD_ID=0
CONNECTION=0
IO_ERROR=0
WRONG_LENGTH=0
WRONG_MAP=0
WRONG_REDUCE=0
File Input Format Counters
Bytes Read=546
File Output Format Counters
Bytes Written=24
I am generating the output by configuring the arguments from the properties. However, when I check the output file, it is empty. Arguments
Empty Output for example, part-r-00000
May I please ask what is the issue here? Is there something I need to update in the source code?