niedziela, 14 kwietnia 2013

More on K-D trees

Previous post shows how to properly and efficiently find closest point using k-d tree algorithm for latitude-logitude coordinates. Here is supplement to the topic. Additional features are:
  • finding not one, but arbitrary number of closest points,
  • finding all points within specified range from given location,
  • make code easy to use for metrics different than lat-lng.
To find arbitrary number of points, what we need to store is not one but list of closest points. Additionally what we need to know is distance to the most distant point - to know whether newly checked shall replace the worst of stored. To achieve this property list of point has to be sorted according to distance from the source point. Below is listing of the method.

private void findNearest(PointNode<P> root, P point, int depth, LimitedSortedArrayList<D> results) {
D distance = point.getDistance(root.point);
if (root.left == null && root.right == null) {
results.addSorted(distance);
return;
}

final int axis = depth % 2;
boolean left = false;

if ((root.left != null) && (point.compareAxisTo(axis, root.point) < 0)) {
findNearest(root.left, point, depth+1, results);
left = true;
}
else if (root.right != null) {
findNearest(root.right, point, depth+1, results);
}

results.addSorted(distance);

if (!results.isFilled() || results.getWorst().compareAxisTo(axis, distance) >= 0) {
if (left) {
if (root.right != null) {
findNearest(root.right, point, depth+1, results);
}
}
else {
if (root.left != null) {
findNearest(root.left, point, depth+1, results);
}
}
}
}

Key point for algorithm above is "addSorted" method. This method is convenient extension of ArrayList implemented in LimitedSortedArrayList class, that allow only for limited storage of sorted items:

private static class LimitedSortedArrayList<T extends Comparable<T>> extends SortedArrayList<T> {
private int limit;

public LimitedSortedArrayList(int max_number) {
limit = max_number;
}
   public void addSorted(T value) {
      if (size() < limit) {
         insertSorted(value);
      }
      else {
        overwriteSorted(value);
      }
   }
   public boolean isFilled() {
      return size() >= limit;
   }
}

LimitedSortedArrayList extends ArrayList, but not directly only via SortedArrayList class. Responsibility of SortedArrayList is to keep order according to "compareTo", allow insertion of new elements and overwriting existing (to keep constant size of the container).

private static class SortedArrayList<T extends Comparable<T>> extends ArrayList<T> {
   private void sort() {
      for (int i = size()-1; i > 0 && get(i).compareTo(get(i-1)) < 0; i--)
        Collections.swap(this, i, i-1);
   }
   public T getWorst() {
      if (size() > 0) {
        return get(size()-1);
     }
      return null;
   }
   public void overwriteSorted(T value) {
      if (value.compareTo(getWorst()) < 0) {
        set(size()-1, value);
        sort();
      }
   }
   public void insertSorted(T value) {
add(value);
sort();
   }
}

Methods presented above do not contains exact types of point and means of distance calculation. These are provided via template parameters. P stands for point which must extend Metric interface and D stands for Distance and extends Distance interface.

public interface Metric<P, D extends Distance<D, P>> extends ComparableAxis<P> {
D getDistance(P other);
}
public interface Distance<D, P> extends Comparable<D>, ComparableAxis<D> {
P getToPoint();
}

where ComparableAxis is interface that allows for comparisons along specified axis:

public interface ComparableAxis<T> {
int compareAxisTo(int axis, T o);
}

For latitude-longitude we have following implementations of the interfaces:

public class LatLngCommon implements Metric<LatLngCommon, CosDistance> {
public double lat;
public double lng;
public double sin_lat;
public double cos_lat;
public double sin_lng;
public double cos_lng;

public LatLngCommon(double latitude, double longitude) {
lat = latitude;
lng = longitude;
double lat_rad = Math.toRadians(lat);
double lng_rad = Math.toRadians(lng);
sin_lat = Math.sin(lat_rad);
cos_lat = Math.cos(lat_rad);
sin_lng = Math.sin(lng_rad);
cos_lng = Math.cos(lng_rad);
}

@Override
public int compareAxisTo(int axis, LatLngCommon o) {
return (0==axis)
?(Double.compare(lat, o.lat))
:(Double.compare(lng, o.lng));
}
@Override
public CosDistance getDistance(LatLngCommon other) {
return new CosDistance(this, other);
}
}


public class CosDistance implements Distance<CosDistance, LatLngCommon> {

private LatLngCommon to_point;
private double sin_from_lat__sin_to_lat;
private double cos_from_lat__cos_to_lat;
private double sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng;
private double cos_dist;
private static final double RADIUS = 6371000.0;

public CosDistance(LatLngCommon from, LatLngCommon to) {
to_point = to;
sin_from_lat__sin_to_lat = from.sin_lat*to.sin_lat;
cos_from_lat__cos_to_lat = from.cos_lat*to.cos_lat;
sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng = from.sin_lng*to.sin_lng + from.cos_lng*to.cos_lng;
cos_dist = sin_from_lat__sin_to_lat + cos_from_lat__cos_to_lat*(sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng);
}
public CosDistance(double distance_meters) {
to_point = null;
sin_from_lat__sin_to_lat = 0;
cos_from_lat__cos_to_lat = 0;
sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng = 0;
cos_dist = Math.cos(distance_meters/RADIUS);
}
@Override
public int compareTo(CosDistance o) {
return  - Double.compare(cos_dist, o.cos_dist);
}
@Override
public int compareAxisTo(int axis, CosDistance o) {
return (0==axis)
?( - Double.compare(cos_dist, o.sin_from_lat__sin_to_lat + o.cos_from_lat__cos_to_lat)) // same longitude
:( - Double.compare(cos_dist, 1.0 + o.to_point.cos_lat*o.to_point.cos_lat*(o.sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng - 1.0))); // same latitude
}
@Override
public LatLngCommon getToPoint() {
return to_point;
}
}


Providing your own implementations of Metric and Distance interfaces you can easily calculate closest point in different coordinates (e.g. Cartesian).

Finding N-closest point example is available here. Number of points to be searched can be specified as get method parameter "number".


Last point is to solve problem stated a little differently - find all points within specified range. You can find in "CosDistance" definition additional constructor (with one double argument - distance) and Earth radius used in Google maps (see green code above).
Such distance calculated from radius in meters is required for finding points in range method:

private void findInRange(PointNode<P> root, P point, D range, int depth, CheckedArrayList<D> results) {

D distance = point.getDistance(root.point);

if (root.left == null && root.right == null) {
results.addChecked(distance);
return;
}

final int axis = depth % 2;
boolean left = false;

if ((root.left != null) && (point.compareAxisTo(axis, root.point) < 0)) {
findInRange(root.left, point, range, depth+1, results);
left = true;
}
else if (root.right != null) {
findInRange(root.right, point, range, depth+1, results);
}

results.addChecked(distance);

if (range.compareAxisTo(axis, distance) >= 0) {
if (left) {
if (root.right != null) {
findInRange(root.right, point, range, depth+1, results);
}
}
else {
if (root.left != null) {
findInRange(root.left, point, range, depth+1, results);
}
}
}
}

As you can see this method is almost the same as finding nearest points. There are two important differences:
  • first - container used for results storage - now we use CheckedArrayList - number of elements stored is not limited, but point is only added if its distance is lower than llimit,
  • second - searching different branch of a tree is performed only when actual distance along actual axis is smaller than given distance (range).
CheckedArrayList is thus implemented as simple extension of ArrayList:

private static class CheckedArrayList<T extends Comparable<T>> extends ArrayList<T> {
T reference;
public CheckedArrayList(T ref) {
reference = ref;
}
   public void addChecked(T value) {
      if (value.compareTo(reference) < 0) {
       add(value);
      }
   }
}

Finding points in range example is available here. Radius for search can be specified as get method parameter "radius".

All the code presented above you can find here.

poniedziałek, 1 kwietnia 2013

Weekend with GWT, Google maps and K-D trees - working example

Working example of algorithm presented in last post can be found here.
It shows map with some of my home town's bus stops. User can drag marker with and then second marker shows nearest bus stop location. Also number of closest busstops to be found can be changed with value of "number" get parameter. Additional parameters are "treedepth" and "showchecked". The former is int value which determines depth for separation lines presentation. The latter is bool value indicating whether to mark points checked during search or not.

Weekend with GWT, Google maps and K-D trees

It started from desire to remind Java language programming. How to program in Java with nice web output - obvious answer is GWT. One may say that it is obsolescent programming environment, but on the other hand it is quite mature and easy to fast-start examples. Of course client side Java is rather limited but you are always free to do normal Java programming on server side.
So, Eclipse + GWT + Chrome makes convenient programming environment for Java development with basic user web interface included (plus friendly debugging). But the biggest advantage is lack of Javascript - oh, how I hate this language, its horrible syntax, dynamic typing leading to "who knows what it is and where it is from".

Next, as I've entered Google world it is tempting and easy to use Google technologies. What I needed was access to maps.
The task is following - efficient (and quite simple - as for one weekend) K-D tree implementation  with presentation on Google maps. But how to easily access GMaps from GWT. One might expect easy integration (as for one company solutions). Indeed there is library for GWT that allows to access GMap, the project is hosted hosted at gwt-google-apis. But GMaps library presented there is for ver.2 and deprecated. If we dig dipper we can find library gwt-maps-3.8.0-pre1.zip for GMapa ver.3 there, but it is in prerelease stage. It is there since Mar 30, 2012 - it shows best how Google carries about GWT technology and why I called it obsolescent.

Lets go back to the task - K-D tree. This old and quite simple algorithm allows for efficient lookup of closest point in n-dimensional space (see wikipedia). In case of map, space is 2-dimensional, but the problem/inconvenience appears - it is not planar (Cartesian) space, but latitude-longitude coordinates on sphere (or rather GPS/WGS 84 ellipsoid). Of course, to calculate distance between coordinate points simple spherical Earth model is considered in presented example (the error is negligible even for Google as they use spherical Mercator projection - read wikipedia).
Digression: interesting page on map projections is at kartoweb.itc.nl, there is also copy of book on the topic.

First step is to create K-D tree for subsequent efficient queries. Algorithm is described in details in wikipedia, therefore only simple Java-pseudo-but-working-code implementation is presented here:

private PointNode createKDTree(LatLngCommon[] points, int from, int to, int depth) {
if (from >= to) {
return null;
}

final int axis = depth % 2;
Arrays.sort(points, from, to, new Comparator<LatLngCommon>(){
@Override
public int compare(LatLngCommon ll1, LatLngCommon ll2) {
return (0==axis)
?(Double.compare(ll1.lat, ll2.lat))
:(Double.compare(ll1.lng, ll2.lng));
}
});
int med = (from + to) / 2;
PointNode res = new PointNode();
res.point = points[med];
res.left  = createKDTree(points, from, med, depth+1);
res.right = createKDTree(points, med+1, to, depth+1);
return res;
}

In code above partition is along parallels for even and meridians for odd tree levels. Thanks to that partition is achieved by just comparing coordinates.

More difficult is second method - finding nearest point using lat-lng coords. The problem is calculation of distance between two points on sphere. One cannot use Euclidean distance as coords are not planar. Distance has to be calculated as great circle distance (shortest distance on sphere between two points).
Standard way of such distance calculation is using spherical law of cosines. But there are two issues here:
  • first - derivation of equation for lat-lng coords. From law of cosines we have:
    cos(c) = cos(a) cos(b) + sin(a) sin(b) cos(C),
    where 'a','b','c' are great circle distances and 'C' is angle between edges of length 'a' and 'b' (opposite to edge with length 'c').
    It might be tempting to think of 'C' as angle between meridian and parallel, which is right angle and cos(C) would be equal to 0. In such case 'a' would be difference between latitudes and 'b' difference between longitudes, and equation would be as simple as
    cos(c) = cos(a) cos(b),
    of course everybody knows that equation is different - why? Because parallels are not great circle distances (except for the Equator).
    Correct equation for law of cosines in lat-lng coords is following:
    cos(c) = sin(lat1) sin(lat2) + cos(lat1) cos(lat2) cos(lng1 - lng2)
    where 'c' is interesting distance between points of coords (lat1, lng1) and (lat2, lng2).
    One can find graphical representation of equation in fig.3-15 in book 'Mapping Hacks' by  Erle S.,  Gibson R.,  Walsh J..
  • second - numerical problems with cos/arccos calculation for small angles - standard way out of the problem is analytic transformation of equation to more numerically suitable called Haversine formula.
But do we really need to calculate distance? I mean distance value between points? Lets take a look at algorithm:

private LatLngCommon findNearest(PointNode root, LatLngCommon point, int depth) {

// end of recursion? if (root.left == null && root.right == null) { return root.point; }
// which partition in tree - lat or lng? final int axis = depth % 2; int cmp = (0==axis) ?(Double.compare(point.lat, root.point.lat)) :(Double.compare(point.lng, root.point.lng)); // deep with recursion to go to the bottom - find approx. best

LatLngCommon best; boolean left = false; if ((root.left != null) && (cmp < 0)) { best = findNearest(root.left, point, depth+1); left = true; } else if (root.right != null) {
best = findNearest(root.right, point, depth+1); }

// check if bottom best is better than actual point?
double best_dist = distance(point, best);

double new_dist = distance(point, root.point);

if (best_dist > new_dist) {

best = root.point;

best_dist = new_dist;

}


// there might be better on the other side of the fence

int otherside = (0==axis) ?(Double.compare(best_dist, distance_lat(point, root.point))) :(Double.compare(best_dist, distance_lng(point, root.point))); LatLngCommon best_otherside = null; if (otherside > 0) {

// get best from the other side - go to the bottom if (left) { if (root.right != null) { best_otherside = findNearest(root.right, point, depth+1); } } else { if (root.left != null) { best_otherside = findNearest(root.left, point, depth+1); } }

// if other side found and better - we have new best if (best_otherside != null) { if (best_dist > distance(point, best_otherside)) { best = best_otherside; } } } 


return best;
}

Please take a look how distance is used. What we need to know is comparison result and not a distance value. Valuable is information if distance between one pair of points is larger/smaller than distance between other pair of points. Now, please note that when:
dist1 > dist2, then
cos(dist1) < cos(dist2),
assuming that dist1 and dist2 are reasonable, i.e. from -180 deg to 180 deg. As cos is even, we do not have to carry about absolute value of distance.
Therefore it is not needed for the algorithm to calculate arccos (or arcsin - for Haversine formula).

Can we do better?
Additional benefits can come from pre-calculating trigonometric functions values, to avoid calculation during nearest point search, i.e.:
cos(dist) = sin(lat1) sin(lat2) + cos(lat1) cos(lat2) cos(lng1 - lng2), then
cos(dist) = sin(lat1) sin(lat2) + cos(lat1) cos(lat2) [sin(lng1) sin(lng2) + cos(lng1) cos(lng2)].

Last optimization is passing as result best point together with distance (cos of distance). This avoids calculating distance (cos of distance) multiple times as result travels back from recursion.

Assuming, final code can look like:

private CosDistance findNearest(PointNode root, LatLngCommon point, int depth) { if (root.left == null && root.right == null) { return new CosDistance(point, root.point); } final int axis = depth % 2; int cmp = (0==axis) ?(Double.compare(point.lat, root.point.lat)) :(Double.compare(point.lng, root.point.lng)); CosDistance best; boolean left = false; if ((root.left != null) && (cmp < 0)) { best = findNearest(root.left, point, depth+1); left = true; } else if (root.right != null) {
best = findNearest(root.right, point, depth+1); }
CosDistance cos_new_dist = new CosDistance(point, root.point); if (best.cos_dist < cos_new_dist.cos_dist) { best = cos_new_dist; } int otherside = (0==axis) ?(Double.compare(best.cos_dist, cos_new_dist.sin_from_lat__sin_to_lat + cos_new_dist.cos_from_lat__cos_to_lat)) :(Double.compare(best.cos_dist, 1.0 + point.cos_lat*point.cos_lat*(cos_new_dist.sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng - 1.0))); if (otherside <= 0) { CosDistance best_otherside = null; if (left) { if (root.right != null) { best_otherside = findNearest(root.right, point, depth+1); } } else { if (root.left != null) { best_otherside = findNearest(root.left, point, depth+1); } } if (best_otherside != null) {
if (best.cos_dist < best_otherside.cos_dist) { best = best_otherside; } } }
return best; }

With following helper class used for result:

private class CosDistance {
public LatLngCommon to_point; public double sin_from_lat__sin_to_lat; public double cos_from_lat__cos_to_lat; public double sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng; public double cos_dist;
public CosDistance(LatLngCommon from, LatLngCommon to) { to_point = to; sin_from_lat__sin_to_lat = from.sin_lat*to.sin_lat; cos_from_lat__cos_to_lat = from.cos_lat*to.cos_lat; sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng = from.sin_lng*to.sin_lng + from.cos_lng*to.cos_lng; cos_dist = sin_from_lat__sin_to_lat + cos_from_lat__cos_to_lat*(sin_from_lng__sin_to_lng__cos_from_lng__cos_to_lng); } }