Thursday 24 October 2013

Locale sensitive sorting using java collator

Below example demonstrates how to achieve locale sensitive sorting using  java Collator.

Locale sensitive sorting -  Sort strings in language-specific order not just by ASCII code.It is based on different weights for comparisons between different characters.

Example code sorts list of Person objects based on their names.


Person.java


package com.test.sort;


import java.text.CollationKey;


/**

 * Holds information related to person.
 * 
 * @author Jagdev
 * 
 */
public class Person implements Comparable<Person> {

/**

* Holds name of the person.
*/
private String name;

/**

* Holds age of the person.
*/
private int age;
/**
* Holds collation key used for sorting.
*/
private CollationKey sortKey;

/**

* Default constructor.
*/
public Person() {

}


/**

* Parameterized constructor.

* @param sortKey
*            of type CollationKey
*/
public Person(CollationKey sortKey) {
this.setSortKey(sortKey);
}

/**

* @return name
*/
public String getName() {
return name;
}

/**

* @param name
*            of type String
*/
public void setName(String name) {
this.name = name;
}

/**

* @return age
*/
public int getAge() {
return age;
}

/**

* @param age
*            of type int
*/
public void setAge(int age) {
this.age = age;
}

/**

* @param sortKey
*            of type CollationKey
*/
public void setSortKey(CollationKey sortKey) {
this.sortKey = sortKey;
}

/**

* @return sortKey of type CollationKey
*/
public CollationKey getSortKey() {
return sortKey;
}

/*

* (non-Javadoc)

* @see java.lang.Object#toString()
*/
public String toString() {
return "Name = " + name + " Age = " + age;
}

@Override

public int compareTo(Person person) {
return sortKey.compareTo(person.getSortKey());
}

}

CollatorSortMain.java

package com.test.sort;

import java.text.CollationKey;
import java.text.Collator;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;

/**
 * Contains code to demo sorting of collection of the objects using collator
 * @author Jagdev
 *
 */
public class CollatorSortMain {

/**
* @param args
*/
public static void main(String[] args) {
CollatorSortMain collatorSortMain = new CollatorSortMain();
collatorSortMain.testEnglishSort();
collatorSortMain.testPortugseSort();
}

/**
* Create collator using language and Country
* @param language
* @param country
* @return collator
*/
private Collator createCollator(String language, String country) {
Locale locale = new Locale(language, country);
Collator collator = Collator.getInstance(locale);
collator.setStrength(Collator.PRIMARY);
return collator;
}

/**
* Verifies the portuguese sorting using collator
*/
private void testPortugseSort() {
Collator collator_en_US = createCollator("pt", "PT");
List<Person> personListEng = new LinkedList<Person>();
CollationKey collationKey1 = collator_en_US.getCollationKey("céu");
Person p1 = new Person(collationKey1);
p1.setName("céu");
p1.setAge(30);
CollationKey collationKey2 = collator_en_US.getCollationKey("lívido");
Person p2 = new Person(collationKey2);
p2.setName("lívido");
p2.setAge(20);
CollationKey collationKey3 = collator_en_US.getCollationKey("vermelho");
Person p3 = new Person(collationKey3);
p3.setName("vermelho");
p3.setAge(10);
personListEng.add(p3);
personListEng.add(p2);
personListEng.add(p1);
System.out.println("Before Sort " + personListEng);
Collections.sort(personListEng);
System.out.println("After Sort " + personListEng);
}

/**
* Verifies the english sorting using collator
*/
private void testEnglishSort() {
Collator collator_en_US = createCollator("en", "US");
List<Person> personListEng = new LinkedList<Person>();
CollationKey collationKey1 = collator_en_US.getCollationKey("Blue");
Person p1 = new Person(collationKey1);
p1.setName("Blue");
p1.setAge(30);
CollationKey collationKey2 = collator_en_US.getCollationKey("White");
Person p2 = new Person(collationKey2);
p2.setName("White");
p2.setAge(20);
CollationKey collationKey3 = collator_en_US.getCollationKey("Red");
Person p3 = new Person(collationKey3);
p3.setName("Red");
p3.setAge(10);
personListEng.add(p3);
personListEng.add(p2);
personListEng.add(p1);
System.out.println("Before Sort " + personListEng);
Collections.sort(personListEng);
System.out.println("After Sort " + personListEng);

}
}

Here is the Collator javadoc.

Below is the advantage of using collator over compareTo (lexicographically).


For comparing Strings exactly once, the compare method provides the best performance. When sorting a list of Strings however, it is generally necessary to compare each String multiple times. In this case, CollationKeys provide better performance. The CollationKey class converts a String to a series of bits that can be compared bitwise against other CollationKeys. A CollationKey is created by a Collator object for a given String.