i'm newbie hadoop programming , have started learning setting hadoop 2.7.1 on 3 node cluster. have tried running helloworld jars comes out of box in hadoop , ran fine success wrote own driver code in local machine , bundled jar , executed way fails no error messages.
here code , did.
wordcountmapper.java
package mot.com.bin.test; import java.io.ioexception; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.longwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.mapreducebase; import org.apache.hadoop.mapred.mapper; import org.apache.hadoop.mapred.outputcollector; import org.apache.hadoop.mapred.reporter; public class wordcountmapper extends mapreducebase implements mapper<longwritable, text, text, intwritable> { public void map(longwritable key, text value, outputcollector<text, intwritable> opc, reporter r) throws ioexception { string s = value.tostring(); (string word :s.split(" ")) { if( word.length() > 0) { opc.collect(new text(word), new intwritable(1)); } } } }
wordcountreduce.java
package mot.com.bin.test; import java.io.ioexception; import java.util.iterator; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.io.text; import org.apache.hadoop.mapred.mapreducebase; import org.apache.hadoop.mapred.outputcollector; import org.apache.hadoop.mapred.reducer; import org.apache.hadoop.mapred.reporter; public class wordcountreduce extends mapreducebase implements reducer < text, intwritable, text, intwritable>{ public void reduce(text key, iterator<intwritable> values, outputcollector<text, intwritable> opc, reporter r) throws ioexception { // todo auto-generated method stub int = 0; while (values.hasnext()) { intwritable in = values.next(); i+=in.get(); } opc.collect(key, new intwritable (i)); }
wordcount.java
/** * **driver** */ package mot.com.bin.test; import org.apache.hadoop.conf.configured; import org.apache.hadoop.fs.path; import org.apache.hadoop.io.intwritable; import org.apache.hadoop.mapred.fileinputformat; import org.apache.hadoop.mapred.fileoutputformat; import org.apache.hadoop.mapred.jobclient; import org.apache.hadoop.mapred.jobconf; import org.apache.hadoop.util.tool; import org.apache.hadoop.io.text; //import com.sun.jersey.core.impl.provider.entity.xmljaxbelementprovider.text; /** * @author rgb764 * */ public class wordcount extends configured implements tool{ /** * @param args */ public static void main(string[] args) { // todo auto-generated method stub } public int run(string[] arg0) throws exception { if (arg0.length < 2) { system.out.println("need input file , output directory"); return -1; } jobconf conf = new jobconf(); fileinputformat.setinputpaths(conf, new path( arg0[0])); fileoutputformat.setoutputpath(conf, new path( arg0[1])); conf.setoutputkeyclass(text.class); conf.setoutputvalueclass(intwritable.class); conf.setmapperclass(wordcountmapper.class); conf.setreducerclass(wordcountreduce.class); conf.setoutputkeyclass(text.class); conf.setoutputvalueclass(intwritable.class); jobclient.runjob(conf); return 0; } }
first tried extracting jar eclipse , run in hadoop cluster. no errors yet no success well. moved individual java files namenode , compiled each java files , created jar file there, still hadoop command returns no results no errors well. kindly me on this.
hadoop jar wordcout.jar mot.com.bin.test.wordcount /karthik/mytext.txt /tempo
extracted dependent jar files using maven , added them classpath in name node. me figure , going wrong.
imo missing code in main method instantiate tool implementation ( wordcount in case) , runs same.
public static void main(string[] args) throws exception { int res = toolrunner.run(new configuration(), new wordcount(), args); system.exit(res); }
refer this.