module QuickSearch
    ( buildTokenPartitions
    , buildQuickSearch
    , rawBuildQuickSearch
    , matchesWithThreshold
    , topNMatches
    , batch
    , batchTopNMatches
    , batchMatchesWithThreshold
    , Token
    , Score
    , Scorer
    , Entry(..)
    , entryName
    , entryUID
    , Match(..)
    , QuickSearch(..)
    , damerauLevenshteinNorm
    , jaro
    , jaroWinkler
    ) where

import           Data.Hashable                  ( Hashable )
import qualified Data.Text                     as T
import           Data.Text.Metrics              ( damerauLevenshteinNorm
                                                , jaro
                                                , jaroWinkler
                                                )

import           QuickSearch.Internal.Filter    ( Entry(..)
                                                , Token
                                                , buildTokenPartitions
                                                , entryName
                                                , entryUID
                                                )
import           QuickSearch.Internal.Matcher   ( Match(..)
                                                , QuickSearch(..)
                                                , Score
                                                , Scorer
                                                , matchScore
                                                , scoreMatches
                                                )

-- | Given a list of entries to be searched, create a QuickSearch object.
rawBuildQuickSearch
    :: (Hashable uid, Eq uid)
    => [Entry T.Text uid]  -- ^ List of entries to be searched
    -> QuickSearch uid  -- ^ QuickSearch object holding token partitions
rawBuildQuickSearch :: [Entry Text uid] -> QuickSearch uid
rawBuildQuickSearch [Entry Text uid]
entries = (([Entry Text uid], HashMap Text (HashSet uid)) -> QuickSearch uid)
-> [Entry Text uid]
-> HashMap Text (HashSet uid)
-> QuickSearch uid
forall a b c. ((a, b) -> c) -> a -> b -> c
curry ([Entry Text uid], HashMap Text (HashSet uid)) -> QuickSearch uid
forall uid.
([Entry Text uid], HashMap Text (HashSet uid)) -> QuickSearch uid
QuickSearch [Entry Text uid]
entries (HashMap Text (HashSet uid) -> QuickSearch uid)
-> HashMap Text (HashSet uid) -> QuickSearch uid
forall a b. (a -> b) -> a -> b
$ [Entry Text uid] -> HashMap Text (HashSet uid)
forall uid.
(Hashable uid, Eq uid) =>
[Entry Text uid] -> HashMap Text (HashSet uid)
buildTokenPartitions [Entry Text uid]
entries

-- | Given a list of pairs of (T.Text, uid) to be searched,
-- create a QuickSearch object.
buildQuickSearch
    :: (Hashable uid, Eq uid)
    => [(T.Text, uid)]  -- ^ List of entries to be searched
    -> QuickSearch uid  -- ^ QuickSearch object holding token partitions
buildQuickSearch :: [(Text, uid)] -> QuickSearch uid
buildQuickSearch = [Entry Text uid] -> QuickSearch uid
forall uid.
(Hashable uid, Eq uid) =>
[Entry Text uid] -> QuickSearch uid
rawBuildQuickSearch ([Entry Text uid] -> QuickSearch uid)
-> ([(Text, uid)] -> [Entry Text uid])
-> [(Text, uid)]
-> QuickSearch uid
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Text, uid) -> Entry Text uid)
-> [(Text, uid)] -> [Entry Text uid]
forall a b. (a -> b) -> [a] -> [b]
map (Text, uid) -> Entry Text uid
forall name uid. (name, uid) -> Entry name uid
Entry

-- | Given a QuickSearch object, scorer, and string, return the top N matches.
topNMatches
    :: (Hashable uid, Eq uid)
    => QuickSearch uid  -- ^ QuickSearch object holding token partitions
    -> Int  -- ^ N: Number of results to return
    -> Scorer  -- ^ String similarity function of type (Text -> Text -> Ratio Int)
    -> T.Text  -- ^ String to be searched
    -> [Match Score (Entry T.Text uid)]  -- ^ Top N most similar entries
topNMatches :: QuickSearch uid
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid)]
topNMatches QuickSearch uid
qs Int
n Scorer
scorer Text
entry = Int -> [Match Int (Entry Text uid)] -> [Match Int (Entry Text uid)]
forall a. Int -> [a] -> [a]
take Int
n (Text -> QuickSearch uid -> Scorer -> [Match Int (Entry Text uid)]
forall uid.
(Hashable uid, Eq uid) =>
Text -> QuickSearch uid -> Scorer -> [Match Int (Entry Text uid)]
scoreMatches Text
entry QuickSearch uid
qs Scorer
scorer)

-- | Given a QuickSearch object, scorer, and string, return all matches with a
-- score greater than the given threshold.
matchesWithThreshold
    :: (Hashable uid, Eq uid)
    => QuickSearch uid  -- ^ QuickSearch object holding token partitions
    -> Int  -- ^ Threshold score above which to return results
    -> Scorer  -- ^ String similarity function of type (Text -> Text -> Ratio Int)
    -> T.Text  -- ^ String to be searched
    -> [Match Score (Entry T.Text uid)]  -- ^ Top N most similar entries
matchesWithThreshold :: QuickSearch uid
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid)]
matchesWithThreshold QuickSearch uid
qs Int
cutoff Scorer
scorer Text
entry = (Match Int (Entry Text uid) -> Bool)
-> [Match Int (Entry Text uid)] -> [Match Int (Entry Text uid)]
forall a. (a -> Bool) -> [a] -> [a]
takeWhile ((Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
cutoff) (Int -> Bool)
-> (Match Int (Entry Text uid) -> Int)
-> Match Int (Entry Text uid)
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Match Int (Entry Text uid) -> Int
forall name uid. Match Int (Entry name uid) -> Int
matchScore)
                                                        [Match Int (Entry Text uid)]
results
    where results :: [Match Int (Entry Text uid)]
results = Text -> QuickSearch uid -> Scorer -> [Match Int (Entry Text uid)]
forall uid.
(Hashable uid, Eq uid) =>
Text -> QuickSearch uid -> Scorer -> [Match Int (Entry Text uid)]
scoreMatches Text
entry QuickSearch uid
qs Scorer
scorer

-- | Turn a match retrieval function into one that works on lists of entries.
batch
    :: (Hashable uid1, Eq uid1, Hashable uid2, Eq uid2)
    => (  QuickSearch uid2
       -> Int
       -> Scorer
       -> T.Text
       -> [Match Score (Entry T.Text uid2)]
       )
  -- ^ A match retrieval function, such as topNMatches
    -> QuickSearch uid2  -- ^ QuickSearch object holding token partitions
    -> Int  -- ^ The reference number for the match retrieval function.
          -- N for topNMatches, threshold for matchesWithThreshold
    -> Scorer  -- ^ String similarity function of type (Text -> Text -> Ratio Int)
    -> [(T.Text, uid1)]  -- ^ List of entries to be processed
    -> [(Entry T.Text uid1, [Match Score (Entry T.Text uid2)])]
  -- ^ List of entries and the results returned for each.
batch :: (QuickSearch uid2
 -> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)])
-> QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
batch QuickSearch uid2
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)]
f QuickSearch uid2
qs Int
n Scorer
scorer [(Text, uid1)]
entries =
    let entries' :: [Entry Text uid1]
entries' = ((Text, uid1) -> Entry Text uid1)
-> [(Text, uid1)] -> [Entry Text uid1]
forall a b. (a -> b) -> [a] -> [b]
map (Text, uid1) -> Entry Text uid1
forall name uid. (name, uid) -> Entry name uid
Entry [(Text, uid1)]
entries
        results :: [[Match Int (Entry Text uid2)]]
results  = (Entry Text uid1 -> [Match Int (Entry Text uid2)])
-> [Entry Text uid1] -> [[Match Int (Entry Text uid2)]]
forall a b. (a -> b) -> [a] -> [b]
map (QuickSearch uid2
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)]
f QuickSearch uid2
qs Int
n Scorer
scorer (Text -> [Match Int (Entry Text uid2)])
-> (Entry Text uid1 -> Text)
-> Entry Text uid1
-> [Match Int (Entry Text uid2)]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Entry Text uid1 -> Text
forall name uid. Entry name uid -> name
entryName) [Entry Text uid1]
entries'
    in  [Entry Text uid1]
-> [[Match Int (Entry Text uid2)]]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
forall a b. [a] -> [b] -> [(a, b)]
zip [Entry Text uid1]
entries' [[Match Int (Entry Text uid2)]]
results


-- | Version of topNMatches that processes lists of entries instead of strings.
batchTopNMatches
    :: (Hashable uid1, Eq uid1, Hashable uid2, Eq uid2)
    => QuickSearch uid2  -- ^ QuickSearch object holding token partitions
    -> Int  -- ^ N: Number of results to return
    -> Scorer  -- ^ String similarity function of type (Text -> Text -> Ratio Int)
    -> [(T.Text, uid1)]  -- ^ List of entries to be processed
    -> [(Entry T.Text uid1, [Match Score (Entry T.Text uid2)])]
  -- ^ List of entries and up to the top N matches for each.
batchTopNMatches :: QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
batchTopNMatches = (QuickSearch uid2
 -> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)])
-> QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
forall uid1 uid2.
(Hashable uid1, Eq uid1, Hashable uid2, Eq uid2) =>
(QuickSearch uid2
 -> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)])
-> QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
batch QuickSearch uid2
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)]
forall uid.
(Hashable uid, Eq uid) =>
QuickSearch uid
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid)]
topNMatches

-- | Version of matchesWithThreshold that processes lists of entries instead of strings.
batchMatchesWithThreshold
    :: (Hashable uid1, Eq uid1, Hashable uid2, Eq uid2)
    => QuickSearch uid2  -- ^ QuickSearch object holding token partitions
    -> Int  -- ^ Score threshold above which to keep results
    -> Scorer  -- ^ String similarity function of type (Text -> Text -> Ratio Int)
    -> [(T.Text, uid1)]  -- ^ List of entries to be processed
    -> [(Entry T.Text uid1, [Match Score (Entry T.Text uid2)])]
  -- ^ List of entries and their matches above the score threshold.
batchMatchesWithThreshold :: QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
batchMatchesWithThreshold = (QuickSearch uid2
 -> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)])
-> QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
forall uid1 uid2.
(Hashable uid1, Eq uid1, Hashable uid2, Eq uid2) =>
(QuickSearch uid2
 -> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)])
-> QuickSearch uid2
-> Int
-> Scorer
-> [(Text, uid1)]
-> [(Entry Text uid1, [Match Int (Entry Text uid2)])]
batch QuickSearch uid2
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid2)]
forall uid.
(Hashable uid, Eq uid) =>
QuickSearch uid
-> Int -> Scorer -> Text -> [Match Int (Entry Text uid)]
matchesWithThreshold