How to execute Map and Reduce Algorithm
MongoDB is a scalable, high-performance, open source NoSQL database.
It offers Ad hoc queries, Indexing, Replication, Load balancing, File storage (GridFS), Aggregation,
MapReduce and Server-side JavaScript execution.
This requires the library mongo-2.8.0.jar to be present in the classpath.
The following example shows how to execute map reduce algorithm in MongoDB.
package com.bethecoder.tutorials.mongodb.tests;
import java.net.UnknownHostException;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import com.mongodb.DBObject;
import com.mongodb.MapReduceCommand;
import com.mongodb.MapReduceOutput;
import com.mongodb.Mongo;
import com.mongodb.MongoException;
/*
things - collection
Doc : { "_id" : 1.0 , "tags" : [ "dog" , "cat"]}
Doc : { "_id" : 2.0 , "tags" : [ "cat"]}
Doc : { "_id" : 3.0 , "tags" : [ "mouse" , "cat" , "dog"]}
Doc : { "_id" : 4.0 , "tags" : [ ]}
*/
public class MapReduce {
/**
* @param args
* @throws MongoException
* @throws UnknownHostException
*/
public static void main ( String [] args ) throws UnknownHostException, MongoException {
//Connect to mongoDB with given IP and PORT
Mongo mongo = new Mongo ( "localhost" , 27017 ) ;
//Get the database object
DB db = mongo.getDB ( "mydb" ) ;
//Get a single collection
DBCollection collection = db.getCollection ( "things" ) ;
System.out.println ( collection.toString ()) ;
// get all available documents
DBCursor cursor = collection.find () ;
while ( cursor.hasNext ()) {
System.out.println ( "Doc : " + cursor.next ()) ;
}
////////////// MAP & REDUCE ALGORITHM ////////////////
/**
* STEP 1
*/
String map = "function(){" +
"this.tags.forEach(" +
"function(z){" +
"emit( z , { count : 1 } );" +
"}" +
");" +
"};" ;
/**
* STEP 2
*/
String reduce = "function( key , values ){" +
"var total = 0;" +
"for ( var i=0; i<values.length; i++ )" +
"total += values[i].count;" +
"return { count : total };" +
"};" ;
// Execute the map reduce function
MapReduceCommand mr = new MapReduceCommand (
collection, map, reduce, null,
MapReduceCommand.OutputType.INLINE, null ) ;
// Execute the map reduce
MapReduceOutput out = collection.mapReduce ( mr ) ;
for ( DBObject obj : out.results () ) {
System.out.println ( obj ) ;
}
}
}
It gives the following output,
things
Doc : { "_id" : 1.0 , "tags" : [ "dog" , "cat"]}
Doc : { "_id" : 2.0 , "tags" : [ "cat"]}
Doc : { "_id" : 3.0 , "tags" : [ "mouse" , "cat" , "dog"]}
Doc : { "_id" : 4.0 , "tags" : [ ]}
{ "_id" : "cat" , "value" : { "count" : 3.0}}
{ "_id" : "dog" , "value" : { "count" : 2.0}}
{ "_id" : "mouse" , "value" : { "count" : 1.0}}