0

I am trying to count the frequency of each word in a text file using hadoop. I have attached the code.

package sub4;

import java.io.IOException;
import java.util.*; 
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.conf.*;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.Reducer.Context;
import org.apache.hadoop.mapreduce.lib.input.*;
import org.apache.hadoop.mapreduce.lib.output.*;
public class count {
    public static class Map extends Mapper<LongWritable, Text, Text, IntWritable>{
        //private final static IntWritable one = new IntWritable(1);
        IntWritable one = new IntWritable(1);
        //private Text word = new Text();


    public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] line = value.toString().split(",");
        for(String lines:line) {
            context.write(new Text(lines),one);
        }
    }
    } 

  

public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
        public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {          int sum = 0;
            for (IntWritable val : values) {
                sum += val.get();
            }
            context.write(key, new IntWritable(sum));
        }  
  }



  public static void main(String[] args) throws Exception {
       Configuration conf = new Configuration();
       Job job = new Job(conf, "count");
       job.setJarByClass(count.class);
       job.setOutputKeyClass(Text.class);
       job.setOutputValueClass(IntWritable.class);
       job.setMapperClass(Map.class);
       job.setReducerClass(Reduce.class);
       job.setInputFormatClass(TextInputFormat.class);      
       job.setOutputFormatClass(TextOutputFormat.class);    
       FileInputFormat.addInputPath(job, new Path(args[0]));
       FileOutputFormat.setOutputPath(job, new Path(args[1]));
       job.waitForCompletion(true);
    }        ``
}
 

I saved this code and tried to make it a jar file in eclipse by right clicking it and pressing export. After than I went to terminal and typed hadoop jar /home/user/wcount.jar sub4.count /abc/wc/output.

This error came

Exception in thread "main" java.lang.ArrayIndexOutOfBoundsException: 1
    at sub4.count.main(count.java:51)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at org.apache.hadoop.util.RunJar.main(RunJar.java:156)
0

1 Answer 1

0

The code expects an input file name and an output file name on the command line.

You didn’t provide them.

Sign up to request clarification or add additional context in comments.

2 Comments

Hi, Thanks for replying. This is my first time learning hadoop. So not sure what you mean by it. I have mentioned wcount.jar. Isnt it the file name?
No. Look where args is being used.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.