package org.lsst.ccs.utilities.misc;

import java.util.*;

/**
 * Tests strings for similarity based on Levenshtein distance.
 *
 * @author onoprien
 */
public class Similarity {
    
    /**
     * Computes similarity score for two strings.
     * The lower the score, the more similar strings are.
     * Identical strings yield score of 0.
     * 
     * @param s1 First string to compare.
     * @param s2 Second string to compare.
     * @return Similarity score.
     */
    static public int compute(String s1, String s2) {
        int[] cost = new int[s2.length() + 1];
        for (int i = 0; i <= s1.length(); i++) {
            int lastValue = i;
            for (int j = 0; j <= s2.length(); j++) {
                if (i == 0) {
                    cost[j] = j;
                } else {
                    if (j > 0) {
                        int newValue = cost[j - 1];
                        if (s1.charAt(i - 1) != s2.charAt(j - 1)) {
                            newValue = Math.min(Math.min(newValue, lastValue), cost[j]) + 1;
                        }
                        cost[j - 1] = lastValue;
                        lastValue = newValue;
                    }
                }
            }
            if (i > 0) {
                cost[s2.length()] = lastValue;
            }
        }
        return cost[s2.length()];
    }
    
    /**
     * Returns a list of strings from {@code candidates} that are most similar to {@code probe}.
     * 
     * @param probe Test string.
     * @param candidates List of strings from which those most similar to {@code probe} should be selected.
     * @param max Maximum number of strings to select.
     * @param threshold Only strings with similarity score below {@code threshold} can be selected.
     * @return List of selected strings.
     */
    static public ArrayList<String> head(String probe, List<String> candidates, int max, int threshold) {
        TreeMap<Integer,LinkedList<String>> mapOut = new TreeMap<>();
        int size = 0;
        HashSet<String> in = new HashSet<>();
        for (String s : candidates) {
            if (!(in.contains(s))) {
                int score = compute(probe, s);
                if (score < threshold) {
                    LinkedList<String> atScore = mapOut.get(score);
                    if (atScore == null) {
                        atScore = new LinkedList<>();
                        mapOut.put(score, atScore);
                    }
                    atScore.add(s);
                    in.add(s);
                    if (size == max) {
                        LinkedList<String> last = mapOut.lastEntry().getValue();
                        in.remove(last.pollLast());
                        if (last.isEmpty()) {
                            mapOut.pollLastEntry();
                            threshold = mapOut.lastKey();
                        }
                    } else if (++size == max) {
                        threshold = mapOut.lastKey();
                    }
                }
            }
        }
        ArrayList<String> out = new ArrayList<>(size);
        mapOut.values().forEach(list -> out.addAll(list));
        return out;
    }
    
}
