tl  tr
  Home | Tutorials | Articles | Videos | Products | Tools | Search
Interviews | Open Source | Tag Cloud | Follow Us | Bookmark | Contact   
 Articles > Java > Coding Examples > Word Frequency

Word Frequency 

Word frequency in a given String.

File Name  :  
com/bethecoder/tutorials/coding/examples/WordFrequencyTest.java 
Author  :  Sudhakar KV
Email  :  [email protected]
   
package com.bethecoder.tutorials.coding.examples;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;

public class WordFrequencyTest {

  /**
   @param args
   */
  public static void main(String[] args) {

    String str = "Jobs related to the computer and communications " +
        "industry is becoming big hits recently. Since a lot of " +
        "things have turned digital, a lot of companies are " +
        "looking for professionals who are familiar with computer " +
        "and internet know-how. This was amplified further by the " +
        "emergence of web services which are part of the backbone " +
        "of the worlds business processing outsourcing (BPO) industry. " +
        "If you are applying for a position involved in web services, " +
        "it will not be an easy task because you will be competing with " +
        "only the real professional in the field.";
    showWordFrequency(str);
  }

  private static void showWordFrequency(String str) {
    List<String> words = getWords(str);
    Map<String, Integer> frequnecy = new HashMap<String, Integer>();
    
    for (String word : words) {
      if (!frequnecy.containsKey(word)) {
        frequnecy.put(word, 1);
      else {
        frequnecy.put(word, frequnecy.get(word1);
      }
    }
    
    //System.out.println(frequnecy);
    Iterator<String> keys = frequnecy.keySet().iterator();
    String key;
    while (keys.hasNext()) {
      key = keys.next();
      
      System.out.println(key + "=" + frequnecy.get(key));
    }
  }
  
  private static List<String> getWords(String str) {
    List<String> words = new ArrayList<String>();
    StringTokenizer st = new StringTokenizer(str, " ,.()[]!@#$%^&*/\\")//common delimiters
    while (st.hasMoreTokens()) {
      words.add(st.nextToken());
    }
    return words;
  }
}
   

It gives the following output,
to=1
professionals=1
for=2
by=1
Since=1
related=1
who=1
becoming=1
of=5
are=4
turned=1
know-how=1
recently=1
only=1
processing=1
industry=2
amplified=1
be=2
services=2
companies=1
professional=1
and=2
not=1
further=1
involved=1
hits=1
will=2
lot=2
big=1
If=1
internet=1
backbone=1
position=1
computer=2
Jobs=1
have=1
communications=1
worlds=1
digital=1
This=1
business=1
was=1
BPO=1
because=1
looking=1
real=1
competing=1
easy=1
is=1
with=2
it=1
applying=1
task=1
a=3
web=2
you=2
the=6
emergence=1
in=2
field=1
familiar=1
which=1
outsourcing=1
an=1
things=1
part=1



 
  


  
bl  br